From b281e3cb853d98ed1c7355c245289df52428257b Mon Sep 17 00:00:00 2001 From: Francois Lagunas Date: Thu, 1 Apr 2021 18:13:51 +0200 Subject: [PATCH] Deleting deprecated files. --- .gitignore | 6 + analysis/files/checkpoints.txt | 1 - analysis/files/files.txt | 154 - .../removed_files_2021-03-30_11_27_19.json | 879 + analysis/files/results.json | 37702 -------------- analysis/files/results/results.json | 1 - analysis/files/results/results10.json | 1 - analysis/files/results/results11.json | 1 - analysis/files/results/results12.json | 1 - analysis/files/results/results13.json | 1 - analysis/files/results/results14.json | 1 - analysis/files/results/results15.json | 1 - analysis/files/results/results16.json | 1 - analysis/files/results/results2.json | 1 - analysis/files/results/results3.json | 1 - analysis/files/results/results4.json | 1 - analysis/files/results/results5.json | 1 - analysis/files/results/results6.json | 1 - analysis/files/results/results8.json | 1 - analysis/files/results/results9.json | 1 - analysis/files/results/results_back.json | 1 - analysis/files/test.json | 40061 --------------- analysis/files/test2.json | 40488 ---------------- 23 files changed, 885 insertions(+), 118422 deletions(-) delete mode 100644 analysis/files/checkpoints.txt delete mode 100644 analysis/files/files.txt create mode 100644 analysis/files/removed_files_2021-03-30_11_27_19.json delete mode 100644 analysis/files/results.json delete mode 100644 analysis/files/results/results.json delete mode 100644 analysis/files/results/results10.json delete mode 100644 analysis/files/results/results11.json delete mode 100644 analysis/files/results/results12.json delete mode 100644 analysis/files/results/results13.json delete mode 100644 analysis/files/results/results14.json delete mode 100644 analysis/files/results/results15.json delete mode 100644 analysis/files/results/results16.json delete mode 100644 analysis/files/results/results2.json delete mode 100644 analysis/files/results/results3.json delete mode 100644 analysis/files/results/results4.json delete mode 100644 analysis/files/results/results5.json delete mode 100644 analysis/files/results/results6.json delete mode 100644 analysis/files/results/results8.json delete mode 100644 analysis/files/results/results9.json delete mode 100644 analysis/files/results/results_back.json delete mode 100644 analysis/files/test.json delete mode 100644 analysis/files/test2.json diff --git a/.gitignore b/.gitignore index e37ffb72..81ad97e7 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,9 @@ __pycache__/ # Jupyter Notebook .ipynb_checkpoints + +# wandb information directory +wandb + +# backup directories +back \ No newline at end of file diff --git a/analysis/files/checkpoints.txt b/analysis/files/checkpoints.txt deleted file mode 100644 index f505faca..00000000 --- a/analysis/files/checkpoints.txt +++ /dev/null @@ -1 +0,0 @@ -/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-140000 \ No newline at end of file diff --git a/analysis/files/files.txt b/analysis/files/files.txt deleted file mode 100644 index 1e7f3e62..00000000 --- a/analysis/files/files.txt +++ /dev/null @@ -1,154 +0,0 @@ -/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132 -/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-20000 -/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-22132 -/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_v0/checkpoint-20000 -/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_v0/checkpoint-22132 -/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-15000 -/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-20000 -/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132 -/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13/checkpoint-22132 -/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10/checkpoint-20000 -/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10/checkpoint-22132 -/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-22132 -/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-22132 -/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-20000 -/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132 -/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15/checkpoint-22132 -/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40/checkpoint-22132 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l20--2021-01-20--19-01-34/checkpoint-105000 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l20--2021-01-20--19-01-34/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l40--2021-01-20--19-02-03/checkpoint-105000 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l40--2021-01-20--19-02-03/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-95000 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l10--2021-01-20--19-01-04/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l40--2021-01-20--19-00-35/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l20--2021-01-19--16-59-13/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-105000 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl2--2021-01-21--00-53-13/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-105000 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-105000 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l10--2021-01-20--18-59-37/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l10--2021-01-20--18-59-37/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l10--2021-01-20--18-58-11/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l40--2021-01-20--18-59-08/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l10--2021-01-19--17-00-07/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl2--2021-01-21--00-54-43/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l20--2021-01-20--19-00-06/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl2--2021-01-21--00-51-49/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l40--2021-01-19--16-58-18/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_0/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l20--2021-01-19--17-00-34/checkpoint-110660 -/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000 -/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000 -/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000 -/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000 -/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-130000 -/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-165000 -/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl225_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl150_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-10000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-15000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.9999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-5000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl5_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl5_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl300_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-215000 -/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-220000 -/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-221320 -/home/lagunas/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000 -/home/lagunas/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/home/lagunas/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold_apme-sigmoied_threshold_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/home/lagunas/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 -/home/lagunas/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000 diff --git a/analysis/files/removed_files_2021-03-30_11_27_19.json b/analysis/files/removed_files_2021-03-30_11_27_19.json new file mode 100644 index 00000000..b1b4d094 --- /dev/null +++ b/analysis/files/removed_files_2021-03-30_11_27_19.json @@ -0,0 +1,879 @@ +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6_--5f772c87c5edbc85/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6_--5f772c87c5edbc85/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6_--5f772c87c5edbc85/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6_--5f772c87c5edbc85/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6_--5f772c87c5edbc85/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6_--5f772c87c5edbc85/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6_--5f772c87c5edbc85/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6_--5f772c87c5edbc85/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6_--5f772c87c5edbc85/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6_--5f772c87c5edbc85/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6_--5f772c87c5edbc85/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6_--5f772c87c5edbc85/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6_--5f772c87c5edbc85/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6_--5f772c87c5edbc85/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6_--5f772c87c5edbc85/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6_--5f772c87c5edbc85/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6_--5f772c87c5edbc85/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6_--5f772c87c5edbc85/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6_--5f772c87c5edbc85/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch6_--5f772c87c5edbc85/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch3/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3_--37a1c030322f0ff3/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch3/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3_--37a1c030322f0ff3/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch3/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3_--37a1c030322f0ff3/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch3/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3_--37a1c030322f0ff3/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch3/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3_--37a1c030322f0ff3/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch3/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3_--37a1c030322f0ff3/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch3/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3_--37a1c030322f0ff3/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch3/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3_--37a1c030322f0ff3/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch3/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3_--37a1c030322f0ff3/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch3/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3_--37a1c030322f0ff3/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch3/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3_--37a1c030322f0ff3/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch3/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3_--37a1c030322f0ff3/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch3/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3_--37a1c030322f0ff3/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch3/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3_--37a1c030322f0ff3/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch3/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3_--37a1c030322f0ff3/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch3/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3_--37a1c030322f0ff3/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch3/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3_--37a1c030322f0ff3/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch3/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3_--37a1c030322f0ff3/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch3/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch3_--37a1c030322f0ff3/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--5fcfb7ff678f0d71/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--5fcfb7ff678f0d71/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--5fcfb7ff678f0d71/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--5fcfb7ff678f0d71/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--5fcfb7ff678f0d71/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--5fcfb7ff678f0d71/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--5fcfb7ff678f0d71/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--5fcfb7ff678f0d71/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--5fcfb7ff678f0d71/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--5fcfb7ff678f0d71/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--5fcfb7ff678f0d71/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--5fcfb7ff678f0d71/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--5fcfb7ff678f0d71/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--5fcfb7ff678f0d71/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--5fcfb7ff678f0d71/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--5fcfb7ff678f0d71/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--5fcfb7ff678f0d71/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--5fcfb7ff678f0d71/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--5fcfb7ff678f0d71/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--5fcfb7ff678f0d71/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--63d7a49c946fed3/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--63d7a49c946fed3/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--63d7a49c946fed3/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--63d7a49c946fed3/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--63d7a49c946fed3/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--63d7a49c946fed3/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--63d7a49c946fed3/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--63d7a49c946fed3/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--63d7a49c946fed3/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--63d7a49c946fed3/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--63d7a49c946fed3/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--63d7a49c946fed3/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--63d7a49c946fed3/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--63d7a49c946fed3/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--63d7a49c946fed3/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--63d7a49c946fed3/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--63d7a49c946fed3/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--63d7a49c946fed3/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--63d7a49c946fed3/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--63d7a49c946fed3/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--7eaf27127735c06f/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--7eaf27127735c06f/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--7eaf27127735c06f/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--7eaf27127735c06f/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--7eaf27127735c06f/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--7eaf27127735c06f/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--7eaf27127735c06f/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--7eaf27127735c06f/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--7eaf27127735c06f/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--7eaf27127735c06f/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--7eaf27127735c06f/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--7eaf27127735c06f/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--7eaf27127735c06f/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--7eaf27127735c06f/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--7eaf27127735c06f/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--7eaf27127735c06f/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--7eaf27127735c06f/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--7eaf27127735c06f/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--7eaf27127735c06f/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--7eaf27127735c06f/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--7eaf27127735c06f/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test_final_fine_tune/fine_tuned_hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch2/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2_--34ff303320644b64/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch2/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2_--34ff303320644b64/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch2/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2_--34ff303320644b64/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch2/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2_--34ff303320644b64/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch2/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2_--34ff303320644b64/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch2/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2_--34ff303320644b64/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch2/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2_--34ff303320644b64/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch2/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2_--34ff303320644b64/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch2/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2_--34ff303320644b64/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch2/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2_--34ff303320644b64/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch2/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2_--34ff303320644b64/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch2/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2_--34ff303320644b64/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch2/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2_--34ff303320644b64/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch2/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2_--34ff303320644b64/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch2/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2_--34ff303320644b64/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch2/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2_--34ff303320644b64/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch2/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2_--34ff303320644b64/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch2/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2_--34ff303320644b64/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch2/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2_--34ff303320644b64/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch2/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2_--34ff303320644b64/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch2/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch2_--34ff303320644b64/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-140000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-170000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-160000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-120000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-130000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-190000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-220000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-200000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-210000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-110000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-180000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_60_d0.25/checkpoint-150000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--3006fe9afa215f73/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--3006fe9afa215f73/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--3006fe9afa215f73/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--3006fe9afa215f73/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--3006fe9afa215f73/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--3006fe9afa215f73/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--3006fe9afa215f73/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--3006fe9afa215f73/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--3006fe9afa215f73/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--3006fe9afa215f73/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--3006fe9afa215f73/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--3006fe9afa215f73/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--3006fe9afa215f73/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--3006fe9afa215f73/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--3006fe9afa215f73/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--3006fe9afa215f73/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--3006fe9afa215f73/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--3006fe9afa215f73/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--3006fe9afa215f73/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--3006fe9afa215f73/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--3006fe9afa215f73/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--35f0c1d86d528754/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--35f0c1d86d528754/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--35f0c1d86d528754/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--35f0c1d86d528754/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--35f0c1d86d528754/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--35f0c1d86d528754/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--35f0c1d86d528754/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--35f0c1d86d528754/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--35f0c1d86d528754/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--35f0c1d86d528754/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--35f0c1d86d528754/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--35f0c1d86d528754/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--35f0c1d86d528754/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--35f0c1d86d528754/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--35f0c1d86d528754/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--35f0c1d86d528754/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--35f0c1d86d528754/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--35f0c1d86d528754/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--35f0c1d86d528754/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test5/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test5___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test5___dpm-sigmoied_threshold--35f0c1d86d528754/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-27500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-32500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-42500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-37500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-7500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-12500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-17500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-22500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-2500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--17cd29ad8a563746/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-27500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-32500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-47500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-42500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-37500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-7500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-12500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-17500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-22500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-2500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad4___dpm-sigmoied_threshold:1d_alt_ap--754f92d6579864ca/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-27500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-32500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-47500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-42500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-37500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-7500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-12500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-17500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-22500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-2500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-52500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--696f4785b3ba52e7/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_60_d0.25/checkpoint-27500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_60_d0.25/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_60_d0.25/checkpoint-32500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_60_d0.25/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_60_d0.25/checkpoint-42500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_60_d0.25/checkpoint-37500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_60_d0.25/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_60_d0.25/checkpoint-7500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_60_d0.25/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_60_d0.25/checkpoint-12500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_60_d0.25/checkpoint-17500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_60_d0.25/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_60_d0.25/checkpoint-22500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_60_d0.25/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_60_d0.25/checkpoint-2500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_60_d0.25/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_60_d0.25/checkpoint-52500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_60_d0.25/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_60_d0.25/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-27500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-32500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-47500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-42500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-37500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-7500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-12500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-17500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-22500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-2500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-52500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold:--d169c0ebde721c7/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-27500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-32500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-42500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-37500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-7500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-12500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-17500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-22500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-2500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-52500/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-__data_2to__devel_data__nn_pruning__output__squad_test4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test4___dpm-sigmoied_threshold--6b3d26fc7262a898/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--75942d701c2bed7e/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--75942d701c2bed7e/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--75942d701c2bed7e/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--75942d701c2bed7e/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--75942d701c2bed7e/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--75942d701c2bed7e/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--75942d701c2bed7e/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--75942d701c2bed7e/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--75942d701c2bed7e/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--75942d701c2bed7e/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--75942d701c2bed7e/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--75942d701c2bed7e/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--75942d701c2bed7e/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--75942d701c2bed7e/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--75942d701c2bed7e/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--75942d701c2bed7e/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--75942d701c2bed7e/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--75942d701c2bed7e/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--75942d701c2bed7e/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--75942d701c2bed7e/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--75942d701c2bed7e/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6cb2db64e9a885f1/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6cb2db64e9a885f1/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6cb2db64e9a885f1/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6cb2db64e9a885f1/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6cb2db64e9a885f1/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6cb2db64e9a885f1/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6cb2db64e9a885f1/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6cb2db64e9a885f1/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6cb2db64e9a885f1/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6cb2db64e9a885f1/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6cb2db64e9a885f1/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6cb2db64e9a885f1/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6cb2db64e9a885f1/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6cb2db64e9a885f1/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6cb2db64e9a885f1/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6cb2db64e9a885f1/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6cb2db64e9a885f1/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6cb2db64e9a885f1/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6cb2db64e9a885f1/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6cb2db64e9a885f1/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6cb2db64e9a885f1/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6b7cbdb694e8fe5f/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6b7cbdb694e8fe5f/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6b7cbdb694e8fe5f/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6b7cbdb694e8fe5f/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6b7cbdb694e8fe5f/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6b7cbdb694e8fe5f/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6b7cbdb694e8fe5f/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6b7cbdb694e8fe5f/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6b7cbdb694e8fe5f/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6b7cbdb694e8fe5f/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6b7cbdb694e8fe5f/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6b7cbdb694e8fe5f/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6b7cbdb694e8fe5f/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6b7cbdb694e8fe5f/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6b7cbdb694e8fe5f/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6b7cbdb694e8fe5f/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6b7cbdb694e8fe5f/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6b7cbdb694e8fe5f/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_fullpatch4/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch4_--6b7cbdb694e8fe5f/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7ebf7572d80fe282/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7ebf7572d80fe282/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7ebf7572d80fe282/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7ebf7572d80fe282/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7ebf7572d80fe282/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7ebf7572d80fe282/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7ebf7572d80fe282/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7ebf7572d80fe282/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7ebf7572d80fe282/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7ebf7572d80fe282/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7ebf7572d80fe282/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7ebf7572d80fe282/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7ebf7572d80fe282/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7ebf7572d80fe282/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7ebf7572d80fe282/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7ebf7572d80fe282/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7ebf7572d80fe282/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7ebf7572d80fe282/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7ebf7572d80fe282/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7ebf7572d80fe282/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7ebf7572d80fe282/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7fe43555f854fbb6/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7fe43555f854fbb6/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7fe43555f854fbb6/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7fe43555f854fbb6/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7fe43555f854fbb6/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7fe43555f854fbb6/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7fe43555f854fbb6/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7fe43555f854fbb6/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7fe43555f854fbb6/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7fe43555f854fbb6/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7fe43555f854fbb6/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7fe43555f854fbb6/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7fe43555f854fbb6/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7fe43555f854fbb6/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7fe43555f854fbb6/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7fe43555f854fbb6/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7fe43555f854fbb6/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7fe43555f854fbb6/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7fe43555f854fbb6/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7fe43555f854fbb6/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--7fe43555f854fbb6/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--45bf1e1da1b7299c/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--45bf1e1da1b7299c/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--45bf1e1da1b7299c/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--45bf1e1da1b7299c/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--45bf1e1da1b7299c/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--45bf1e1da1b7299c/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--45bf1e1da1b7299c/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--45bf1e1da1b7299c/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--45bf1e1da1b7299c/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--45bf1e1da1b7299c/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--45bf1e1da1b7299c/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--45bf1e1da1b7299c/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--45bf1e1da1b7299c/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--45bf1e1da1b7299c/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--45bf1e1da1b7299c/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--45bf1e1da1b7299c/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--45bf1e1da1b7299c/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--45bf1e1da1b7299c/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--45bf1e1da1b7299c/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--45bf1e1da1b7299c/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--45bf1e1da1b7299c/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-sig--51ab88e6fe9e0cb/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-sig--51ab88e6fe9e0cb/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-sig--51ab88e6fe9e0cb/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-sig--51ab88e6fe9e0cb/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-sig--51ab88e6fe9e0cb/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-sig--51ab88e6fe9e0cb/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-sig--51ab88e6fe9e0cb/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-sig--51ab88e6fe9e0cb/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-sig--51ab88e6fe9e0cb/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-sig--51ab88e6fe9e0cb/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-sig--51ab88e6fe9e0cb/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-sig--51ab88e6fe9e0cb/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-sig--51ab88e6fe9e0cb/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-sig--51ab88e6fe9e0cb/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-sig--51ab88e6fe9e0cb/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-sig--51ab88e6fe9e0cb/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-sig--51ab88e6fe9e0cb/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-sig--51ab88e6fe9e0cb/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-sig--51ab88e6fe9e0cb/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-sig--51ab88e6fe9e0cb/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-sig--51ab88e6fe9e0cb/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--5742d4278f871b20/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--5742d4278f871b20/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--5742d4278f871b20/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--5742d4278f871b20/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--5742d4278f871b20/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--5742d4278f871b20/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--5742d4278f871b20/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--5742d4278f871b20/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--5742d4278f871b20/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--5742d4278f871b20/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--5742d4278f871b20/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--5742d4278f871b20/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--5742d4278f871b20/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--5742d4278f871b20/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--5742d4278f871b20/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--5742d4278f871b20/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--5742d4278f871b20/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--5742d4278f871b20/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--5742d4278f871b20/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--5742d4278f871b20/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--3dbebc278974335e/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--3dbebc278974335e/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--3dbebc278974335e/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--3dbebc278974335e/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--3dbebc278974335e/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--3dbebc278974335e/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--3dbebc278974335e/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--3dbebc278974335e/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--3dbebc278974335e/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--3dbebc278974335e/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--3dbebc278974335e/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--3dbebc278974335e/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--3dbebc278974335e/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--3dbebc278974335e/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--3dbebc278974335e/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--3dbebc278974335e/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--3dbebc278974335e/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--3dbebc278974335e/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--3dbebc278974335e/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_8_mvp_lt/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_8_mvp_lt___dpm-si--3dbebc278974335e/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___--4520aaa044f7c325/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___--4520aaa044f7c325/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___--4520aaa044f7c325/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___--4520aaa044f7c325/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___--4520aaa044f7c325/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___--4520aaa044f7c325/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___--4520aaa044f7c325/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___--4520aaa044f7c325/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___--4520aaa044f7c325/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___--4520aaa044f7c325/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___--4520aaa044f7c325/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___--4520aaa044f7c325/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___--4520aaa044f7c325/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___--4520aaa044f7c325/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___--4520aaa044f7c325/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___--4520aaa044f7c325/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___--4520aaa044f7c325/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___--4520aaa044f7c325/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___--4520aaa044f7c325/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test_9_gelupatch/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test_9_fullpatch___--4520aaa044f7c325/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-125000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-120000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-115000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-110000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-135000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl20/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-125000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-120000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-130000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-110000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-135000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-125000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-120000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-130000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-115000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-110000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-125000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-120000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-130000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-115000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-110000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl5/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte6_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte6_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte6_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte6_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte6_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte6_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte6_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte6_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte6_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte6_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-140000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-125000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-120000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-130000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-115000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-110000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-135000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_mnop-aloxatel__bert-base-mnli_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl30/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-125000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-120000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-130000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-115000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-110000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-135000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/mnli_test2/hp_od-output__mnli_test2___pdtbs32_pdebs128_nte12_ws12000_rn-output__mnli_test2___fw4_rfl40/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test6___dpm-sigmoied_threshold:--4b86dc18da73d79/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test6___dpm-sigmoied_threshold:--4b86dc18da73d79/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test6___dpm-sigmoied_threshold:--4b86dc18da73d79/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test6___dpm-sigmoied_threshold:--4b86dc18da73d79/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test6___dpm-sigmoied_threshold:--4b86dc18da73d79/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test6___dpm-sigmoied_threshold:--4b86dc18da73d79/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test6___dpm-sigmoied_threshold:--4b86dc18da73d79/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test6___dpm-sigmoied_threshold:--4b86dc18da73d79/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test6___dpm-sigmoied_threshold:--4b86dc18da73d79/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test6___dpm-sigmoied_threshold:--4b86dc18da73d79/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test6___dpm-sigmoied_threshold:--4b86dc18da73d79/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test6___dpm-sigmoied_threshold:--4b86dc18da73d79/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test6___dpm-sigmoied_threshold:--4b86dc18da73d79/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test6___dpm-sigmoied_threshold:--4b86dc18da73d79/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test6___dpm-sigmoied_threshold:--4b86dc18da73d79/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test6___dpm-sigmoied_threshold:--4b86dc18da73d79/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test6___dpm-sigmoied_threshold:--4b86dc18da73d79/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test6___dpm-sigmoied_threshold:--4b86dc18da73d79/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test6___dpm-sigmoied_threshold:--4b86dc18da73d79/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test6___dpm-sigmoied_threshold:--4b86dc18da73d79/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test6/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test6___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test6___dpm-sigmoied_threshold:--4b86dc18da73d79/checkpoint-20000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test7/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test7___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test7___dpm-sigmoied_threshold--58e126daa38a2a47/checkpoint-85000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test7/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test7___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test7___dpm-sigmoied_threshold--58e126daa38a2a47/checkpoint-45000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test7/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test7___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test7___dpm-sigmoied_threshold--58e126daa38a2a47/checkpoint-10000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test7/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test7___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test7___dpm-sigmoied_threshold--58e126daa38a2a47/checkpoint-80000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test7/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test7___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test7___dpm-sigmoied_threshold--58e126daa38a2a47/checkpoint-5000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test7/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test7___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test7___dpm-sigmoied_threshold--58e126daa38a2a47/checkpoint-75000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test7/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test7___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test7___dpm-sigmoied_threshold--58e126daa38a2a47/checkpoint-50000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test7/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test7___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test7___dpm-sigmoied_threshold--58e126daa38a2a47/checkpoint-60000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test7/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test7___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test7___dpm-sigmoied_threshold--58e126daa38a2a47/checkpoint-40000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test7/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test7___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test7___dpm-sigmoied_threshold--58e126daa38a2a47/checkpoint-105000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test7/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test7___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test7___dpm-sigmoied_threshold--58e126daa38a2a47/checkpoint-100000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test7/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test7___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test7___dpm-sigmoied_threshold--58e126daa38a2a47/checkpoint-15000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test7/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test7___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test7___dpm-sigmoied_threshold--58e126daa38a2a47/checkpoint-95000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test7/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test7___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test7___dpm-sigmoied_threshold--58e126daa38a2a47/checkpoint-25000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test7/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test7___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test7___dpm-sigmoied_threshold--58e126daa38a2a47/checkpoint-55000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test7/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test7___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test7___dpm-sigmoied_threshold--58e126daa38a2a47/checkpoint-90000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test7/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test7___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test7___dpm-sigmoied_threshold--58e126daa38a2a47/checkpoint-30000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test7/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test7___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test7___dpm-sigmoied_threshold--58e126daa38a2a47/checkpoint-70000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test7/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test7___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test7___dpm-sigmoied_threshold--58e126daa38a2a47/checkpoint-65000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test7/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test7___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test7___dpm-sigmoied_threshold--58e126daa38a2a47/checkpoint-35000/pytorch_model.bin +/data_2to/devel_data/nn_pruning/output/squad_test7/hp_od-__data_2to__devel_data__nn_pruning__output__squad_test7___es-steps_nte20_ls250_stl50_est5000_rn-__data_2to__devel_data__nn_pruning__output__squad_test7___dpm-sigmoied_threshold--58e126daa38a2a47/checkpoint-20000/pytorch_model.bin diff --git a/analysis/files/results.json b/analysis/files/results.json deleted file mode 100644 index 6656e6d2..00000000 --- a/analysis/files/results.json +++ /dev/null @@ -1,37702 +0,0 @@ -{ - "base_speed_report": { - "cuda_eval_elapsed_time": 38.594393005371096, - "eval_elapsed_time": 45.63197132572532 - }, - "checkpoints": { - "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.15799432355723, - "f1": 86.94169166073364 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5 - }, - "speed": { - "cuda_eval_elapsed_time": 22.747020225524903, - "eval_elapsed_time": 29.958857133984566 - }, - "speedup": 1.6966790648941144, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 427776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1617408, - "linear_dense_total": 4718592, - "linear_nnz": 2045184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 394752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1708032, - "linear_dense_total": 4718592, - "linear_nnz": 2102784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 182784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 245760, - "linear_dense_total": 4718592, - "linear_nnz": 428544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 112128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 626688, - "linear_dense_total": 4718592, - "linear_nnz": 738816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 469248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1955328, - "linear_dense_total": 4718592, - "linear_nnz": 2424576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 579840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1923072, - "linear_dense_total": 4718592, - "linear_nnz": 2502912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 539904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1837056, - "linear_dense_total": 4718592, - "linear_nnz": 2376960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 424704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1777152, - "linear_dense_total": 4718592, - "linear_nnz": 2201856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 439296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1468416, - "linear_dense_total": 4718592, - "linear_nnz": 1907712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 428544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1152000, - "linear_dense_total": 4718592, - "linear_nnz": 1580544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 397824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 697344, - "linear_dense_total": 4718592, - "linear_nnz": 1095168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 235776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 291840, - "linear_dense_total": 4718592, - "linear_nnz": 527616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 19932672, - "linear_sparsity": 76.53175636574075, - "linear_total": 84934656, - "nnz": 43891202, - "total": 108893186, - "total_sparsity": 59.6933438975695 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.2620624408704, - "f1": 86.97825692623259 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 16.405798454284668, - "eval_elapsed_time": 23.622337056789547 - }, - "speedup": 2.3524848920286154, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 519424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 973824, - "linear_dense_total": 4718592, - "linear_nnz": 1493248, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 565504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1191936, - "linear_dense_total": 4718592, - "linear_nnz": 1757440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 346368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 167424, - "linear_dense_total": 4718592, - "linear_nnz": 513792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 220160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 423936, - "linear_dense_total": 4718592, - "linear_nnz": 644096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 646400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1382400, - "linear_dense_total": 4718592, - "linear_nnz": 2028800, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 937728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1359360, - "linear_dense_total": 4718592, - "linear_nnz": 2297088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 846592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1423872, - "linear_dense_total": 4718592, - "linear_nnz": 2270464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 688640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1393152, - "linear_dense_total": 4718592, - "linear_nnz": 2081792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 744704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1070592, - "linear_dense_total": 4718592, - "linear_nnz": 1815296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 831488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 781824, - "linear_dense_total": 4718592, - "linear_nnz": 1613312, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 522496, - "linear_attention_total": 2359296, - "linear_dense_nnz": 446976, - "linear_dense_total": 4718592, - "linear_nnz": 969472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 413696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 181248, - "linear_dense_total": 4718592, - "linear_nnz": 594944, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 18079744, - "linear_sparsity": 78.7133487654321, - "linear_total": 84934656, - "nnz": 42038274, - "total": 108893186, - "total_sparsity": 61.39494531824976 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.6244087038789, - "f1": 88.07723643002453 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5 - }, - "speed": { - "cuda_eval_elapsed_time": 19.890604362487792, - "eval_elapsed_time": 27.08285549096763 - }, - "speedup": 1.9403328477116193, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 721408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1492992, - "linear_dense_total": 4718592, - "linear_nnz": 2214400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 635136, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1755648, - "linear_dense_total": 4718592, - "linear_nnz": 2390784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 484608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 198144, - "linear_dense_total": 4718592, - "linear_nnz": 682752, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 313600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 666624, - "linear_dense_total": 4718592, - "linear_nnz": 980224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 972032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1878528, - "linear_dense_total": 4718592, - "linear_nnz": 2850560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1256448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1932288, - "linear_dense_total": 4718592, - "linear_nnz": 3188736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1260544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1889280, - "linear_dense_total": 4718592, - "linear_nnz": 3149824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1121280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1784832, - "linear_dense_total": 4718592, - "linear_nnz": 2906112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1061888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1393152, - "linear_dense_total": 4718592, - "linear_nnz": 2455040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 988160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1027584, - "linear_dense_total": 4718592, - "linear_nnz": 2015744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 903424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 646656, - "linear_dense_total": 4718592, - "linear_nnz": 1550080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 636416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 250368, - "linear_dense_total": 4718592, - "linear_nnz": 886784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 25271040, - "linear_sparsity": 70.2464916087963, - "linear_total": 84934656, - "nnz": 49229570, - "total": 108893186, - "total_sparsity": 54.79095450471988 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.99053926206244, - "f1": 87.56439208763325 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 2, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 19.37784966278076, - "eval_elapsed_time": 26.613120706751943 - }, - "speedup": 1.9916757368336773, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 684800, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2007552, - "linear_dense_total": 4718592, - "linear_nnz": 2692352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 646656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2019840, - "linear_dense_total": 4718592, - "linear_nnz": 2666496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 432128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 274944, - "linear_dense_total": 4718592, - "linear_nnz": 707072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 277760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 794112, - "linear_dense_total": 4718592, - "linear_nnz": 1071872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 691712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2239488, - "linear_dense_total": 4718592, - "linear_nnz": 2931200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1149184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2211840, - "linear_dense_total": 4718592, - "linear_nnz": 3361024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1007872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2158080, - "linear_dense_total": 4718592, - "linear_nnz": 3165952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 997376, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2073600, - "linear_dense_total": 4718592, - "linear_nnz": 3070976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 911872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1732608, - "linear_dense_total": 4718592, - "linear_nnz": 2644480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 944640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1304064, - "linear_dense_total": 4718592, - "linear_nnz": 2248704, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 763136, - "linear_attention_total": 2359296, - "linear_dense_nnz": 751104, - "linear_dense_total": 4718592, - "linear_nnz": 1514240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 526080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 313344, - "linear_dense_total": 4718592, - "linear_nnz": 839424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 26913792, - "linear_sparsity": 68.31235532407408, - "linear_total": 84934656, - "nnz": 50872322, - "total": 108893186, - "total_sparsity": 53.282364242699266 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test2", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 12, - "output_dir": "output/squad_test2", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test2", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.7038789025544, - "f1": 86.58426699451658 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 15 - }, - "speed": { - "cuda_eval_elapsed_time": 15.051653835296632, - "eval_elapsed_time": 22.226274209097028 - }, - "speedup": 2.56412972472606, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 459776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 749568, - "linear_dense_total": 4718592, - "linear_nnz": 1209344, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 488192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1006080, - "linear_dense_total": 4718592, - "linear_nnz": 1494272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 311040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 148992, - "linear_dense_total": 4718592, - "linear_nnz": 460032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 207360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 311808, - "linear_dense_total": 4718592, - "linear_nnz": 519168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 550144, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1085952, - "linear_dense_total": 4718592, - "linear_nnz": 1636096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 868352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1101312, - "linear_dense_total": 4718592, - "linear_nnz": 1969664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 548864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1198080, - "linear_dense_total": 4718592, - "linear_nnz": 1746944, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 653312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1128960, - "linear_dense_total": 4718592, - "linear_nnz": 1782272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 593920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 867840, - "linear_dense_total": 4718592, - "linear_nnz": 1461760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 721920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 669696, - "linear_dense_total": 4718592, - "linear_nnz": 1391616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 367616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 387072, - "linear_dense_total": 4718592, - "linear_nnz": 754688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 373760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 158208, - "linear_dense_total": 4718592, - "linear_nnz": 531968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 14957824, - "linear_sparsity": 82.38902150848766, - "linear_total": 84934656, - "nnz": 38916354, - "total": 108893186, - "total_sparsity": 64.26190156654981 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test2", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test2", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test2", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-165000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.96877956480606, - "f1": 86.71968503618079 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 15, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 15.359982524871826, - "eval_elapsed_time": 22.516427854076028 - }, - "speedup": 2.512658653281453, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 468480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 542208, - "linear_dense_total": 4718592, - "linear_nnz": 1010688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 518912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 852480, - "linear_dense_total": 4718592, - "linear_nnz": 1371392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 345344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 110592, - "linear_dense_total": 4718592, - "linear_nnz": 455936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 212992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 261120, - "linear_dense_total": 4718592, - "linear_nnz": 474112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 608768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 981504, - "linear_dense_total": 4718592, - "linear_nnz": 1590272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 869888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1026048, - "linear_dense_total": 4718592, - "linear_nnz": 1895936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 775936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1093632, - "linear_dense_total": 4718592, - "linear_nnz": 1869568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 618752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1044480, - "linear_dense_total": 4718592, - "linear_nnz": 1663232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 629248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 754176, - "linear_dense_total": 4718592, - "linear_nnz": 1383424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 707584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 588288, - "linear_dense_total": 4718592, - "linear_nnz": 1295872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 463104, - "linear_attention_total": 2359296, - "linear_dense_nnz": 345600, - "linear_dense_total": 4718592, - "linear_nnz": 808704, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 376064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 139776, - "linear_dense_total": 4718592, - "linear_nnz": 515840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 14334976, - "linear_sparsity": 83.1223476080247, - "linear_total": 84934656, - "nnz": 38293506, - "total": 108893186, - "total_sparsity": 64.83388225963009 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test2", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 30, - "output_dir": "output/squad_test2", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test2", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.74172185430463, - "f1": 86.69521763053608 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 17.306304389953613, - "eval_elapsed_time": 24.480814102105796 - }, - "speedup": 2.230077094204775, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 468992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 940032, - "linear_dense_total": 4718592, - "linear_nnz": 1409024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 606208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1185792, - "linear_dense_total": 4718592, - "linear_nnz": 1792000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 378112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 147456, - "linear_dense_total": 4718592, - "linear_nnz": 525568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 207360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 419328, - "linear_dense_total": 4718592, - "linear_nnz": 626688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 625664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1348608, - "linear_dense_total": 4718592, - "linear_nnz": 1974272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 910592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1320960, - "linear_dense_total": 4718592, - "linear_nnz": 2231552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 828672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1380864, - "linear_dense_total": 4718592, - "linear_nnz": 2209536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 765440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1281024, - "linear_dense_total": 4718592, - "linear_nnz": 2046464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 761088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1003008, - "linear_dense_total": 4718592, - "linear_nnz": 1764096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 792832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 780288, - "linear_dense_total": 4718592, - "linear_nnz": 1573120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 553728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 433152, - "linear_dense_total": 4718592, - "linear_nnz": 986880, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 389888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 182784, - "linear_dense_total": 4718592, - "linear_nnz": 572672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 17711872, - "linear_sparsity": 79.14647231867285, - "linear_total": 84934656, - "nnz": 41670402, - "total": 108893186, - "total_sparsity": 61.73277361909495 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.84578997161779, - "f1": 86.78133258210022 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 17.32754041290283, - "eval_elapsed_time": 24.51584801170975 - }, - "speedup": 2.2273439903006693, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 465664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 938496, - "linear_dense_total": 4718592, - "linear_nnz": 1404160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 584192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1182720, - "linear_dense_total": 4718592, - "linear_nnz": 1766912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 370432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 145920, - "linear_dense_total": 4718592, - "linear_nnz": 516352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 200960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 414720, - "linear_dense_total": 4718592, - "linear_nnz": 615680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 615680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1345536, - "linear_dense_total": 4718592, - "linear_nnz": 1961216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 895488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1314816, - "linear_dense_total": 4718592, - "linear_nnz": 2210304, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 812032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1377792, - "linear_dense_total": 4718592, - "linear_nnz": 2189824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 755456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1282560, - "linear_dense_total": 4718592, - "linear_nnz": 2038016, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 739840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 998400, - "linear_dense_total": 4718592, - "linear_nnz": 1738240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 797440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 774144, - "linear_dense_total": 4718592, - "linear_nnz": 1571584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 513792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 430080, - "linear_dense_total": 4718592, - "linear_nnz": 943872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 381184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 182784, - "linear_dense_total": 4718592, - "linear_nnz": 563968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 17520128, - "linear_sparsity": 79.37222704475309, - "linear_total": 84934656, - "nnz": 41478658, - "total": 108893186, - "total_sparsity": 61.90885809879785 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-10000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.90823084200568, - "f1": 88.13888839423888 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": true, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 14.99999 - }, - "speed": { - "cuda_eval_elapsed_time": 40.403957000732426, - "eval_elapsed_time": 47.70582241564989 - }, - "speedup": 0.9552131986644643, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 2151936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4713984, - "linear_dense_total": 4718592, - "linear_nnz": 6865920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 2299648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4709376, - "linear_dense_total": 4718592, - "linear_nnz": 7009024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 2285568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4638720, - "linear_dense_total": 4718592, - "linear_nnz": 6924288, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 2312448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4687872, - "linear_dense_total": 4718592, - "linear_nnz": 7000320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 2330112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4707840, - "linear_dense_total": 4718592, - "linear_nnz": 7037952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 2330112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4710912, - "linear_dense_total": 4718592, - "linear_nnz": 7041024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 2324992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4704768, - "linear_dense_total": 4718592, - "linear_nnz": 7029760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 2337280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4706304, - "linear_dense_total": 4718592, - "linear_nnz": 7043584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 2321664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4684800, - "linear_dense_total": 4718592, - "linear_nnz": 7006464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 2342400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4683264, - "linear_dense_total": 4718592, - "linear_nnz": 7025664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 2296576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4654080, - "linear_dense_total": 4718592, - "linear_nnz": 6950656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 2259200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4646400, - "linear_dense_total": 4718592, - "linear_nnz": 6905600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 83840256, - "linear_sparsity": 1.288519965277779, - "linear_total": 84934656, - "nnz": 107798786, - "total": 108893186, - "total_sparsity": 1.005021563057218 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-15000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.21192052980132, - "f1": 86.2154189083501 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": true, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 14.99999 - }, - "speed": { - "cuda_eval_elapsed_time": 39.830447120666506, - "eval_elapsed_time": 47.13309640903026 - }, - "speedup": 0.968967104196677, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1914624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4678656, - "linear_dense_total": 4718592, - "linear_nnz": 6593280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 2103296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4669440, - "linear_dense_total": 4718592, - "linear_nnz": 6772736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 2053632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4353024, - "linear_dense_total": 4718592, - "linear_nnz": 6406656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 2100480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4538880, - "linear_dense_total": 4718592, - "linear_nnz": 6639360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 2239232, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4646400, - "linear_dense_total": 4718592, - "linear_nnz": 6885632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 2219520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4657152, - "linear_dense_total": 4718592, - "linear_nnz": 6876672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 2216448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4657152, - "linear_dense_total": 4718592, - "linear_nnz": 6873600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 2226176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4615680, - "linear_dense_total": 4718592, - "linear_nnz": 6841856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 2190848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4595712, - "linear_dense_total": 4718592, - "linear_nnz": 6786560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 2261760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4549632, - "linear_dense_total": 4718592, - "linear_nnz": 6811392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 2178048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4431360, - "linear_dense_total": 4718592, - "linear_nnz": 6609408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 2049792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4349952, - "linear_dense_total": 4718592, - "linear_nnz": 6399744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 80496896, - "linear_sparsity": 5.224910783179015, - "linear_total": 84934656, - "nnz": 104455426, - "total": 108893186, - "total_sparsity": 4.075333051601593 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.9999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-5000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.7038789025544, - "f1": 86.6699349353281 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": true, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 14.9999 - }, - "speed": { - "cuda_eval_elapsed_time": 39.58176746368408, - "eval_elapsed_time": 46.91258597606793 - }, - "speedup": 0.975054816356574, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 2354176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7072768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7077888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7077888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4715520, - "linear_dense_total": 4718592, - "linear_nnz": 7074816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4717056, - "linear_dense_total": 4718592, - "linear_nnz": 7076352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7077888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7077888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7077888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7077888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7077888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 2358272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7076864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 2358272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7076864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 84922880, - "linear_sparsity": 0.013864776234573384, - "linear_total": 84934656, - "nnz": 108881410, - "total": 108893186, - "total_sparsity": 0.010814267111258768 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.94701986754967, - "f1": 86.06827252573265 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 14.216132064819336, - "eval_elapsed_time": 21.342612544074655 - }, - "speedup": 2.7148307872632, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 439296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 605184, - "linear_dense_total": 4718592, - "linear_nnz": 1044480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 367616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 809472, - "linear_dense_total": 4718592, - "linear_nnz": 1177088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 276224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 135168, - "linear_dense_total": 4718592, - "linear_nnz": 411392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 178176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 251904, - "linear_dense_total": 4718592, - "linear_nnz": 430080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 492032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 958464, - "linear_dense_total": 4718592, - "linear_nnz": 1450496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 733696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 918528, - "linear_dense_total": 4718592, - "linear_nnz": 1652224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 461056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1050624, - "linear_dense_total": 4718592, - "linear_nnz": 1511680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 580096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 953856, - "linear_dense_total": 4718592, - "linear_nnz": 1533952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 462592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 764928, - "linear_dense_total": 4718592, - "linear_nnz": 1227520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 624384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 571392, - "linear_dense_total": 4718592, - "linear_nnz": 1195776, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 351744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 348672, - "linear_dense_total": 4718592, - "linear_nnz": 700416, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 339968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 139776, - "linear_dense_total": 4718592, - "linear_nnz": 479744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 12814848, - "linear_sparsity": 84.912109375, - "linear_total": 84934656, - "nnz": 36773378, - "total": 108893186, - "total_sparsity": 66.22986308803564 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.06717123935667, - "f1": 85.28341140334766 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 25 - }, - "speed": { - "cuda_eval_elapsed_time": 13.584790561676026, - "eval_elapsed_time": 20.705443068873137 - }, - "speedup": 2.8410002222816386, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 384768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 502272, - "linear_dense_total": 4718592, - "linear_nnz": 887040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 355840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 701952, - "linear_dense_total": 4718592, - "linear_nnz": 1057792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 256512, - "linear_attention_total": 2359296, - "linear_dense_nnz": 115200, - "linear_dense_total": 4718592, - "linear_nnz": 371712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 150016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 221184, - "linear_dense_total": 4718592, - "linear_nnz": 371200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 413440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 872448, - "linear_dense_total": 4718592, - "linear_nnz": 1285888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 672256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 824832, - "linear_dense_total": 4718592, - "linear_nnz": 1497088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 418560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 932352, - "linear_dense_total": 4718592, - "linear_nnz": 1350912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 523264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 872448, - "linear_dense_total": 4718592, - "linear_nnz": 1395712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 498944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 655872, - "linear_dense_total": 4718592, - "linear_nnz": 1154816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 497664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 562176, - "linear_dense_total": 4718592, - "linear_nnz": 1059840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 297216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 311808, - "linear_dense_total": 4718592, - "linear_nnz": 609024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 316416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 119808, - "linear_dense_total": 4718592, - "linear_nnz": 436224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 11477248, - "linear_sparsity": 86.4869671103395, - "linear_total": 84934656, - "nnz": 35435778, - "total": 108893186, - "total_sparsity": 67.45822277621669 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.80132450331126, - "f1": 87.48291010744668 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 18.270113506317138, - "eval_elapsed_time": 25.450434973929077 - }, - "speedup": 2.1124331270315624, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 627712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1281024, - "linear_dense_total": 4718592, - "linear_nnz": 1908736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 596992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1548288, - "linear_dense_total": 4718592, - "linear_nnz": 2145280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 451584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 182784, - "linear_dense_total": 4718592, - "linear_nnz": 634368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 268288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 559104, - "linear_dense_total": 4718592, - "linear_nnz": 827392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 789504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1709568, - "linear_dense_total": 4718592, - "linear_nnz": 2499072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1180672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1740288, - "linear_dense_total": 4718592, - "linear_nnz": 2920960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1204224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1701888, - "linear_dense_total": 4718592, - "linear_nnz": 2906112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 916480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1600512, - "linear_dense_total": 4718592, - "linear_nnz": 2516992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 909312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1242624, - "linear_dense_total": 4718592, - "linear_nnz": 2151936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 917504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 972288, - "linear_dense_total": 4718592, - "linear_nnz": 1889792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 856064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 542208, - "linear_dense_total": 4718592, - "linear_nnz": 1398272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 611328, - "linear_attention_total": 2359296, - "linear_dense_nnz": 247296, - "linear_dense_total": 4718592, - "linear_nnz": 858624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 22657536, - "linear_sparsity": 73.32356770833333, - "linear_total": 84934656, - "nnz": 46572775, - "total": 108893186, - "total_sparsity": 57.23077199706509 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.27436140018922, - "f1": 87.70461789964966 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 18.375184078216552, - "eval_elapsed_time": 25.600778602063656 - }, - "speedup": 2.1003540884863323, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 645120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1339392, - "linear_dense_total": 4718592, - "linear_nnz": 1984512, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 592896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1571328, - "linear_dense_total": 4718592, - "linear_nnz": 2164224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 480256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 187392, - "linear_dense_total": 4718592, - "linear_nnz": 667648, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 294912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 574464, - "linear_dense_total": 4718592, - "linear_nnz": 869376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 880640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1744896, - "linear_dense_total": 4718592, - "linear_nnz": 2625536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1230848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1761792, - "linear_dense_total": 4718592, - "linear_nnz": 2992640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1214464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1726464, - "linear_dense_total": 4718592, - "linear_nnz": 2940928, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 906240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1629696, - "linear_dense_total": 4718592, - "linear_nnz": 2535936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 943104, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1270272, - "linear_dense_total": 4718592, - "linear_nnz": 2213376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 935936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 987648, - "linear_dense_total": 4718592, - "linear_nnz": 1923584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 872448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 546816, - "linear_dense_total": 4718592, - "linear_nnz": 1419264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 634880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 248832, - "linear_dense_total": 4718592, - "linear_nnz": 883712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 23220736, - "linear_sparsity": 72.66046971450618, - "linear_total": 84934656, - "nnz": 47136529, - "total": 108893186, - "total_sparsity": 56.713059162397904 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.20529801324503, - "f1": 87.11181141207972 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 17.401466148376464, - "eval_elapsed_time": 24.569451212882996 - }, - "speedup": 2.2178816817094407, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 838656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 287232, - "linear_dense_total": 4718592, - "linear_nnz": 1125888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 692224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 496128, - "linear_dense_total": 4718592, - "linear_nnz": 1188352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 489472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 84480, - "linear_dense_total": 4718592, - "linear_nnz": 573952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 293888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 155136, - "linear_dense_total": 4718592, - "linear_nnz": 449024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1089536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 605184, - "linear_dense_total": 4718592, - "linear_nnz": 1694720, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1291264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 671232, - "linear_dense_total": 4718592, - "linear_nnz": 1962496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1384448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 728064, - "linear_dense_total": 4718592, - "linear_nnz": 2112512, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1121280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 662016, - "linear_dense_total": 4718592, - "linear_nnz": 1783296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1127424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 505344, - "linear_dense_total": 4718592, - "linear_nnz": 1632768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 942080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 391680, - "linear_dense_total": 4718592, - "linear_nnz": 1333760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 982016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 222720, - "linear_dense_total": 4718592, - "linear_nnz": 1204736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 645120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 84480, - "linear_dense_total": 4718592, - "linear_nnz": 729600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15791104, - "linear_sparsity": 81.40793788580247, - "linear_total": 84934656, - "nnz": 39702836, - "total": 108893186, - "total_sparsity": 63.53965068117302 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.80794701986756, - "f1": 86.74156854566804 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 15.618790004730226, - "eval_elapsed_time": 22.811819266993552 - }, - "speedup": 2.471023235070233, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 518144, - "linear_attention_total": 2359296, - "linear_dense_nnz": 826368, - "linear_dense_total": 4718592, - "linear_nnz": 1344512, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 516096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1090560, - "linear_dense_total": 4718592, - "linear_nnz": 1606656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 324608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 147456, - "linear_dense_total": 4718592, - "linear_nnz": 472064, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 209920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 345600, - "linear_dense_total": 4718592, - "linear_nnz": 555520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 637952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1204224, - "linear_dense_total": 4718592, - "linear_nnz": 1842176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 913408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1184256, - "linear_dense_total": 4718592, - "linear_nnz": 2097664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 790528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1265664, - "linear_dense_total": 4718592, - "linear_nnz": 2056192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 664576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1201152, - "linear_dense_total": 4718592, - "linear_nnz": 1865728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 629760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 935424, - "linear_dense_total": 4718592, - "linear_nnz": 1565184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 787456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 698880, - "linear_dense_total": 4718592, - "linear_nnz": 1486336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 415744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 428544, - "linear_dense_total": 4718592, - "linear_nnz": 844288, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 423936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 168960, - "linear_dense_total": 4718592, - "linear_nnz": 592896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 16329216, - "linear_sparsity": 80.7743778935185, - "linear_total": 84934656, - "nnz": 40239113, - "total": 108893186, - "total_sparsity": 63.04717083032174 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.18070009460737, - "f1": 85.6109462422114 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 13.485522186279297, - "eval_elapsed_time": 20.651509277056903 - }, - "speedup": 2.86191312967017, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 424960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 482304, - "linear_dense_total": 4718592, - "linear_nnz": 907264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 367616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 706560, - "linear_dense_total": 4718592, - "linear_nnz": 1074176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 256000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 121344, - "linear_dense_total": 4718592, - "linear_nnz": 377344, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 146432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 215040, - "linear_dense_total": 4718592, - "linear_nnz": 361472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 402432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 850944, - "linear_dense_total": 4718592, - "linear_nnz": 1253376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 681984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 826368, - "linear_dense_total": 4718592, - "linear_nnz": 1508352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 405504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 923136, - "linear_dense_total": 4718592, - "linear_nnz": 1328640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 542720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 880128, - "linear_dense_total": 4718592, - "linear_nnz": 1422848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 449536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 645120, - "linear_dense_total": 4718592, - "linear_nnz": 1094656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 577536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 525312, - "linear_dense_total": 4718592, - "linear_nnz": 1102848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 294912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 333312, - "linear_dense_total": 4718592, - "linear_nnz": 628224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 320512, - "linear_attention_total": 2359296, - "linear_dense_nnz": 113664, - "linear_dense_total": 4718592, - "linear_nnz": 434176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 11493376, - "linear_sparsity": 86.46797839506173, - "linear_total": 84934656, - "nnz": 35398714, - "total": 108893186, - "total_sparsity": 67.49225980035152 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl150_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.4484389782403, - "f1": 86.3547925481507 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 150 - }, - "speed": { - "cuda_eval_elapsed_time": 29.783737594604492, - "eval_elapsed_time": 37.12324417894706 - }, - "speedup": 1.2958210124830911, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 30729, - "linear_attention_total": 2359296, - "linear_dense_nnz": 624455, - "linear_dense_total": 4718592, - "linear_nnz": 655184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 77742, - "linear_attention_total": 2359296, - "linear_dense_nnz": 655389, - "linear_dense_total": 4718592, - "linear_nnz": 733131, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 27892, - "linear_attention_total": 2359296, - "linear_dense_nnz": 61389, - "linear_dense_total": 4718592, - "linear_nnz": 89281, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 20781, - "linear_attention_total": 2359296, - "linear_dense_nnz": 51322, - "linear_dense_total": 4718592, - "linear_nnz": 72103, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 70206, - "linear_attention_total": 2359296, - "linear_dense_nnz": 660173, - "linear_dense_total": 4718592, - "linear_nnz": 730379, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 106339, - "linear_attention_total": 2359296, - "linear_dense_nnz": 628112, - "linear_dense_total": 4718592, - "linear_nnz": 734451, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 81845, - "linear_attention_total": 2359296, - "linear_dense_nnz": 574018, - "linear_dense_total": 4718592, - "linear_nnz": 655863, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 68554, - "linear_attention_total": 2359296, - "linear_dense_nnz": 537752, - "linear_dense_total": 4718592, - "linear_nnz": 606306, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 58217, - "linear_attention_total": 2359296, - "linear_dense_nnz": 434629, - "linear_dense_total": 4718592, - "linear_nnz": 492846, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 65705, - "linear_attention_total": 2359296, - "linear_dense_nnz": 313684, - "linear_dense_total": 4718592, - "linear_nnz": 379389, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 39483, - "linear_attention_total": 2359296, - "linear_dense_nnz": 203724, - "linear_dense_total": 4718592, - "linear_nnz": 243207, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 46007, - "linear_attention_total": 2359296, - "linear_dense_nnz": 73599, - "linear_dense_total": 4718592, - "linear_nnz": 119606, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 5511746, - "linear_sparsity": 93.51060419906804, - "linear_total": 84934656, - "nnz": 29470276, - "total": 108893186, - "total_sparsity": 72.93652882926945 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl225_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.39829706717124, - "f1": 85.66626983371626 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 225 - }, - "speed": { - "cuda_eval_elapsed_time": 27.713626304626466, - "eval_elapsed_time": 35.06419681990519 - }, - "speedup": 1.3926143255719736, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 18728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 446655, - "linear_dense_total": 4718592, - "linear_nnz": 465383, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 63059, - "linear_attention_total": 2359296, - "linear_dense_nnz": 464338, - "linear_dense_total": 4718592, - "linear_nnz": 527397, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 21311, - "linear_attention_total": 2359296, - "linear_dense_nnz": 43332, - "linear_dense_total": 4718592, - "linear_nnz": 64643, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 17233, - "linear_attention_total": 2359296, - "linear_dense_nnz": 36806, - "linear_dense_total": 4718592, - "linear_nnz": 54039, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 53761, - "linear_attention_total": 2359296, - "linear_dense_nnz": 462731, - "linear_dense_total": 4718592, - "linear_nnz": 516492, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 84624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 430348, - "linear_dense_total": 4718592, - "linear_nnz": 514972, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 58345, - "linear_attention_total": 2359296, - "linear_dense_nnz": 384869, - "linear_dense_total": 4718592, - "linear_nnz": 443214, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 50615, - "linear_attention_total": 2359296, - "linear_dense_nnz": 346306, - "linear_dense_total": 4718592, - "linear_nnz": 396921, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 41344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 277660, - "linear_dense_total": 4718592, - "linear_nnz": 319004, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 47420, - "linear_attention_total": 2359296, - "linear_dense_nnz": 201763, - "linear_dense_total": 4718592, - "linear_nnz": 249183, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 27562, - "linear_attention_total": 2359296, - "linear_dense_nnz": 133500, - "linear_dense_total": 4718592, - "linear_nnz": 161062, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 34151, - "linear_attention_total": 2359296, - "linear_dense_nnz": 47554, - "linear_dense_total": 4718592, - "linear_nnz": 81705, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 3794015, - "linear_sparsity": 95.5330189363456, - "linear_total": 84934656, - "nnz": 27752545, - "total": 108893186, - "total_sparsity": 74.51397463933142 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 81.40018921475875, - "f1": 88.66263407974378 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 25 - }, - "speed": { - "cuda_eval_elapsed_time": 37.63941863250732, - "eval_elapsed_time": 44.979358388110995 - }, - "speedup": 1.0253716557683228, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 158912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1993831, - "linear_dense_total": 4718592, - "linear_nnz": 2152743, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 234395, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2030737, - "linear_dense_total": 4718592, - "linear_nnz": 2265132, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 134277, - "linear_attention_total": 2359296, - "linear_dense_nnz": 440264, - "linear_dense_total": 4718592, - "linear_nnz": 574541, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 63309, - "linear_attention_total": 2359296, - "linear_dense_nnz": 269756, - "linear_dense_total": 4718592, - "linear_nnz": 333065, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 301048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2114464, - "linear_dense_total": 4718592, - "linear_nnz": 2415512, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 358791, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2106776, - "linear_dense_total": 4718592, - "linear_nnz": 2465567, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 398673, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2058594, - "linear_dense_total": 4718592, - "linear_nnz": 2457267, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 367333, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2043244, - "linear_dense_total": 4718592, - "linear_nnz": 2410577, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 344288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1862492, - "linear_dense_total": 4718592, - "linear_nnz": 2206780, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 304514, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1514517, - "linear_dense_total": 4718592, - "linear_nnz": 1819031, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 265513, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1099308, - "linear_dense_total": 4718592, - "linear_nnz": 1364821, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 201714, - "linear_attention_total": 2359296, - "linear_dense_nnz": 627276, - "linear_dense_total": 4718592, - "linear_nnz": 828990, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 21294026, - "linear_sparsity": 74.92893124804085, - "linear_total": 84934656, - "nnz": 45252556, - "total": 108893186, - "total_sparsity": 58.4431701722824 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl300_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 76.98202459791864, - "f1": 85.40699359564026 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 300 - }, - "speed": { - "cuda_eval_elapsed_time": 25.440285942077637, - "eval_elapsed_time": 32.748252402991056 - }, - "speedup": 1.5170581452285046, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 13195, - "linear_attention_total": 2359296, - "linear_dense_nnz": 344662, - "linear_dense_total": 4718592, - "linear_nnz": 357857, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 53357, - "linear_attention_total": 2359296, - "linear_dense_nnz": 352125, - "linear_dense_total": 4718592, - "linear_nnz": 405482, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 18747, - "linear_attention_total": 2359296, - "linear_dense_nnz": 34723, - "linear_dense_total": 4718592, - "linear_nnz": 53470, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 15957, - "linear_attention_total": 2359296, - "linear_dense_nnz": 30412, - "linear_dense_total": 4718592, - "linear_nnz": 46369, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 43981, - "linear_attention_total": 2359296, - "linear_dense_nnz": 351138, - "linear_dense_total": 4718592, - "linear_nnz": 395119, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 71058, - "linear_attention_total": 2359296, - "linear_dense_nnz": 323059, - "linear_dense_total": 4718592, - "linear_nnz": 394117, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 47705, - "linear_attention_total": 2359296, - "linear_dense_nnz": 287668, - "linear_dense_total": 4718592, - "linear_nnz": 335373, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 40348, - "linear_attention_total": 2359296, - "linear_dense_nnz": 252178, - "linear_dense_total": 4718592, - "linear_nnz": 292526, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 33002, - "linear_attention_total": 2359296, - "linear_dense_nnz": 205112, - "linear_dense_total": 4718592, - "linear_nnz": 238114, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 38753, - "linear_attention_total": 2359296, - "linear_dense_nnz": 150138, - "linear_dense_total": 4718592, - "linear_nnz": 188891, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 22052, - "linear_attention_total": 2359296, - "linear_dense_nnz": 101313, - "linear_dense_total": 4718592, - "linear_nnz": 123365, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 28498, - "linear_attention_total": 2359296, - "linear_dense_nnz": 35917, - "linear_dense_total": 4718592, - "linear_nnz": 64415, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 2895098, - "linear_sparsity": 96.59138196780358, - "linear_total": 84934656, - "nnz": 26853628, - "total": 108893186, - "total_sparsity": 75.33947808267818 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.53926206244087, - "f1": 88.07603620459668 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 50 - }, - "speed": { - "cuda_eval_elapsed_time": 35.31425653076172, - "eval_elapsed_time": 42.675803440622985 - }, - "speedup": 1.092884200230921, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 79341, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1362813, - "linear_dense_total": 4718592, - "linear_nnz": 1442154, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 146964, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1411011, - "linear_dense_total": 4718592, - "linear_nnz": 1557975, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 70746, - "linear_attention_total": 2359296, - "linear_dense_nnz": 191871, - "linear_dense_total": 4718592, - "linear_nnz": 262617, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 36271, - "linear_attention_total": 2359296, - "linear_dense_nnz": 137408, - "linear_dense_total": 4718592, - "linear_nnz": 173679, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 173655, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1463754, - "linear_dense_total": 4718592, - "linear_nnz": 1637409, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 213353, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1442359, - "linear_dense_total": 4718592, - "linear_nnz": 1655712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 221518, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1380230, - "linear_dense_total": 4718592, - "linear_nnz": 1601748, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 179373, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1360274, - "linear_dense_total": 4718592, - "linear_nnz": 1539647, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 168393, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1183896, - "linear_dense_total": 4718592, - "linear_nnz": 1352289, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 159612, - "linear_attention_total": 2359296, - "linear_dense_nnz": 906603, - "linear_dense_total": 4718592, - "linear_nnz": 1066215, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 127230, - "linear_attention_total": 2359296, - "linear_dense_nnz": 600693, - "linear_dense_total": 4718592, - "linear_nnz": 727923, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 105257, - "linear_attention_total": 2359296, - "linear_dense_nnz": 285690, - "linear_dense_total": 4718592, - "linear_nnz": 390947, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 13408315, - "linear_sparsity": 84.21337575088313, - "linear_total": 84934656, - "nnz": 37366845, - "total": 108893186, - "total_sparsity": 65.68486388119823 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.22705771050141, - "f1": 88.08154392563726 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 50 - }, - "speed": { - "cuda_eval_elapsed_time": 35.30916271209717, - "eval_elapsed_time": 42.719326278194785 - }, - "speedup": 1.0930418633843273, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 87221, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1434572, - "linear_dense_total": 4718592, - "linear_nnz": 1521793, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 157517, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1480327, - "linear_dense_total": 4718592, - "linear_nnz": 1637844, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 75446, - "linear_attention_total": 2359296, - "linear_dense_nnz": 204546, - "linear_dense_total": 4718592, - "linear_nnz": 279992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 38439, - "linear_attention_total": 2359296, - "linear_dense_nnz": 144390, - "linear_dense_total": 4718592, - "linear_nnz": 182829, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 188172, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1535574, - "linear_dense_total": 4718592, - "linear_nnz": 1723746, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 230341, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1512620, - "linear_dense_total": 4718592, - "linear_nnz": 1742961, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 240387, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1447041, - "linear_dense_total": 4718592, - "linear_nnz": 1687428, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 195780, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1427597, - "linear_dense_total": 4718592, - "linear_nnz": 1623377, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 184963, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1245019, - "linear_dense_total": 4718592, - "linear_nnz": 1429982, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 172954, - "linear_attention_total": 2359296, - "linear_dense_nnz": 957245, - "linear_dense_total": 4718592, - "linear_nnz": 1130199, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 138133, - "linear_attention_total": 2359296, - "linear_dense_nnz": 635763, - "linear_dense_total": 4718592, - "linear_nnz": 773896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 112972, - "linear_attention_total": 2359296, - "linear_dense_nnz": 304891, - "linear_dense_total": 4718592, - "linear_nnz": 417863, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 14151910, - "linear_sparsity": 83.3378850677867, - "linear_total": 84934656, - "nnz": 38110440, - "total": 108893186, - "total_sparsity": 65.00199746198996 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.89593188268685, - "f1": 87.64967103979136 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 75 - }, - "speed": { - "cuda_eval_elapsed_time": 32.98558323669434, - "eval_elapsed_time": 40.38167083170265 - }, - "speedup": 1.170038217254783, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 56754, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1054479, - "linear_dense_total": 4718592, - "linear_nnz": 1111233, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 116764, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1106103, - "linear_dense_total": 4718592, - "linear_nnz": 1222867, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 50915, - "linear_attention_total": 2359296, - "linear_dense_nnz": 121878, - "linear_dense_total": 4718592, - "linear_nnz": 172793, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 28303, - "linear_attention_total": 2359296, - "linear_dense_nnz": 94314, - "linear_dense_total": 4718592, - "linear_nnz": 122617, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 127558, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1136881, - "linear_dense_total": 4718592, - "linear_nnz": 1264439, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 163709, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1106395, - "linear_dense_total": 4718592, - "linear_nnz": 1270104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 158018, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1044282, - "linear_dense_total": 4718592, - "linear_nnz": 1202300, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 125746, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1010449, - "linear_dense_total": 4718592, - "linear_nnz": 1136195, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 110023, - "linear_attention_total": 2359296, - "linear_dense_nnz": 861094, - "linear_dense_total": 4718592, - "linear_nnz": 971117, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 113086, - "linear_attention_total": 2359296, - "linear_dense_nnz": 632989, - "linear_dense_total": 4718592, - "linear_nnz": 746075, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 81879, - "linear_attention_total": 2359296, - "linear_dense_nnz": 407092, - "linear_dense_total": 4718592, - "linear_nnz": 488971, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 77365, - "linear_attention_total": 2359296, - "linear_dense_nnz": 173330, - "linear_dense_total": 4718592, - "linear_nnz": 250695, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 9959406, - "linear_sparsity": 88.27403739646628, - "linear_total": 84934656, - "nnz": 33917936, - "total": 108893186, - "total_sparsity": 68.85210429971255 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.8391674550615, - "f1": 87.59923644792065 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 75 - }, - "speed": { - "cuda_eval_elapsed_time": 33.06226232147217, - "eval_elapsed_time": 40.42444095481187 - }, - "speedup": 1.1673246261888772, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 56086, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1044542, - "linear_dense_total": 4718592, - "linear_nnz": 1100628, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 115328, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1096450, - "linear_dense_total": 4718592, - "linear_nnz": 1211778, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 50374, - "linear_attention_total": 2359296, - "linear_dense_nnz": 120861, - "linear_dense_total": 4718592, - "linear_nnz": 171235, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 28038, - "linear_attention_total": 2359296, - "linear_dense_nnz": 93754, - "linear_dense_total": 4718592, - "linear_nnz": 121792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 125881, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1127188, - "linear_dense_total": 4718592, - "linear_nnz": 1253069, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 161525, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1096986, - "linear_dense_total": 4718592, - "linear_nnz": 1258511, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 155911, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1035794, - "linear_dense_total": 4718592, - "linear_nnz": 1191705, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 123921, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1001507, - "linear_dense_total": 4718592, - "linear_nnz": 1125428, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 108430, - "linear_attention_total": 2359296, - "linear_dense_nnz": 853489, - "linear_dense_total": 4718592, - "linear_nnz": 961919, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 111505, - "linear_attention_total": 2359296, - "linear_dense_nnz": 627123, - "linear_dense_total": 4718592, - "linear_nnz": 738628, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 80805, - "linear_attention_total": 2359296, - "linear_dense_nnz": 403383, - "linear_dense_total": 4718592, - "linear_nnz": 484188, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 76456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 171492, - "linear_dense_total": 4718592, - "linear_nnz": 247948, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 9866829, - "linear_sparsity": 88.38303530657733, - "linear_total": 84934656, - "nnz": 33825359, - "total": 108893186, - "total_sparsity": 68.93712063856779 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.81362346263009, - "f1": 88.10463591853348 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 27.2810027923584, - "eval_elapsed_time": 34.61669071530923 - }, - "speedup": 1.4146984734806616, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 644096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4032512, - "linear_dense_total": 4718592, - "linear_nnz": 4676608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 583680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4172800, - "linear_dense_total": 4718592, - "linear_nnz": 4756480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 445440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 890880, - "linear_dense_total": 4718592, - "linear_nnz": 1336320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 272384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 875520, - "linear_dense_total": 4718592, - "linear_nnz": 1147904, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 789504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4324352, - "linear_dense_total": 4718592, - "linear_nnz": 5113856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1028096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4392960, - "linear_dense_total": 4718592, - "linear_nnz": 5421056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1067008, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4359168, - "linear_dense_total": 4718592, - "linear_nnz": 5426176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 943104, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4333568, - "linear_dense_total": 4718592, - "linear_nnz": 5276672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1003520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4161536, - "linear_dense_total": 4718592, - "linear_nnz": 5165056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 908288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3889152, - "linear_dense_total": 4718592, - "linear_nnz": 4797440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 868352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3021824, - "linear_dense_total": 4718592, - "linear_nnz": 3890176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 520192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1206272, - "linear_dense_total": 4718592, - "linear_nnz": 1726464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 48734208, - "linear_sparsity": 42.62152777777778, - "linear_total": 84934656, - "nnz": 72671586, - "total": 108893186, - "total_sparsity": 33.26342201062975 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.53926206244087, - "f1": 87.95145431777735 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 27.25869842529297, - "eval_elapsed_time": 34.5833341376856 - }, - "speedup": 1.4158560472410484, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 614400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4140032, - "linear_dense_total": 4718592, - "linear_nnz": 4754432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 596992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4246528, - "linear_dense_total": 4718592, - "linear_nnz": 4843520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 450560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 881664, - "linear_dense_total": 4718592, - "linear_nnz": 1332224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 266240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 863232, - "linear_dense_total": 4718592, - "linear_nnz": 1129472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 788480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4357120, - "linear_dense_total": 4718592, - "linear_nnz": 5145600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1061888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4426752, - "linear_dense_total": 4718592, - "linear_nnz": 5488640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4414464, - "linear_dense_total": 4718592, - "linear_nnz": 5463040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 918528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4399104, - "linear_dense_total": 4718592, - "linear_nnz": 5317632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 998400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4232192, - "linear_dense_total": 4718592, - "linear_nnz": 5230592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 899072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3939328, - "linear_dense_total": 4718592, - "linear_nnz": 4838400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 819200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3028992, - "linear_dense_total": 4718592, - "linear_nnz": 3848192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 516096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1104896, - "linear_dense_total": 4718592, - "linear_nnz": 1620992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 49012736, - "linear_sparsity": 42.29359567901234, - "linear_total": 84934656, - "nnz": 72950082, - "total": 108893186, - "total_sparsity": 33.00767047076757 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.72847682119205, - "f1": 88.08831525592305 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 27.293812591552737, - "eval_elapsed_time": 34.635603360366076 - }, - "speedup": 1.4140345133503194, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 621568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4035584, - "linear_dense_total": 4718592, - "linear_nnz": 4657152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 604160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4155392, - "linear_dense_total": 4718592, - "linear_nnz": 4759552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 486400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 957440, - "linear_dense_total": 4718592, - "linear_nnz": 1443840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 286720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 891904, - "linear_dense_total": 4718592, - "linear_nnz": 1178624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 781312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4284416, - "linear_dense_total": 4718592, - "linear_nnz": 5065728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1068032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4340736, - "linear_dense_total": 4718592, - "linear_nnz": 5408768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1087488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4237312, - "linear_dense_total": 4718592, - "linear_nnz": 5324800, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 908288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4281344, - "linear_dense_total": 4718592, - "linear_nnz": 5189632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1019904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4149248, - "linear_dense_total": 4718592, - "linear_nnz": 5169152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 921600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3827712, - "linear_dense_total": 4718592, - "linear_nnz": 4749312, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 851968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3080192, - "linear_dense_total": 4718592, - "linear_nnz": 3932160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 529408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1278976, - "linear_dense_total": 4718592, - "linear_nnz": 1808384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 48687104, - "linear_sparsity": 42.67698688271605, - "linear_total": 84934656, - "nnz": 72624802, - "total": 108893186, - "total_sparsity": 33.306385213120684 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.48249763481552, - "f1": 87.91705961229685 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 26.4900548248291, - "eval_elapsed_time": 33.8130349079147 - }, - "speedup": 1.4569389629649467, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 634880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3140608, - "linear_dense_total": 4718592, - "linear_nnz": 3775488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 602112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3477504, - "linear_dense_total": 4718592, - "linear_nnz": 4079616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 456704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 494592, - "linear_dense_total": 4718592, - "linear_nnz": 951296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 289792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 541696, - "linear_dense_total": 4718592, - "linear_nnz": 831488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1008640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3929088, - "linear_dense_total": 4718592, - "linear_nnz": 4937728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1197056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4089856, - "linear_dense_total": 4718592, - "linear_nnz": 5286912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1181696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3953664, - "linear_dense_total": 4718592, - "linear_nnz": 5135360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1005568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4006912, - "linear_dense_total": 4718592, - "linear_nnz": 5012480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1043456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3677184, - "linear_dense_total": 4718592, - "linear_nnz": 4720640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 931840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2777088, - "linear_dense_total": 4718592, - "linear_nnz": 3708928, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 862208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1448960, - "linear_dense_total": 4718592, - "linear_nnz": 2311168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 600064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 458752, - "linear_dense_total": 4718592, - "linear_nnz": 1058816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 41809920, - "linear_sparsity": 50.774016203703695, - "linear_total": 84934656, - "nnz": 65744386, - "total": 108893186, - "total_sparsity": 39.6248852522324 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.82024597918638, - "f1": 87.30735739624531 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 24.124949531555178, - "eval_elapsed_time": 31.406295038294047 - }, - "speedup": 1.599770932365684, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 889856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1492992, - "linear_dense_total": 4718592, - "linear_nnz": 2382848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 717824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1850368, - "linear_dense_total": 4718592, - "linear_nnz": 2568192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 489472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 328704, - "linear_dense_total": 4718592, - "linear_nnz": 818176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 331776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 388096, - "linear_dense_total": 4718592, - "linear_nnz": 719872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1113088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2802688, - "linear_dense_total": 4718592, - "linear_nnz": 3915776, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1297408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2961408, - "linear_dense_total": 4718592, - "linear_nnz": 4258816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1402880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2897920, - "linear_dense_total": 4718592, - "linear_nnz": 4300800, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1157120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2873344, - "linear_dense_total": 4718592, - "linear_nnz": 4030464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1187840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2473984, - "linear_dense_total": 4718592, - "linear_nnz": 3661824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 979968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1527808, - "linear_dense_total": 4718592, - "linear_nnz": 2507776, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 952320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 610304, - "linear_dense_total": 4718592, - "linear_nnz": 1562624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 642048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 223232, - "linear_dense_total": 4718592, - "linear_nnz": 865280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 31592448, - "linear_sparsity": 62.80381944444444, - "linear_total": 84934656, - "nnz": 55520034, - "total": 108893186, - "total_sparsity": 49.0142257386059 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.90539262062441, - "f1": 87.36378709007766 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 24.748493873596193, - "eval_elapsed_time": 32.03074289299548 - }, - "speedup": 1.559464313363606, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 949248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1635328, - "linear_dense_total": 4718592, - "linear_nnz": 2584576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 750592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2048000, - "linear_dense_total": 4718592, - "linear_nnz": 2798592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 509952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 352256, - "linear_dense_total": 4718592, - "linear_nnz": 862208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 363520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 420864, - "linear_dense_total": 4718592, - "linear_nnz": 784384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1123328, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2895872, - "linear_dense_total": 4718592, - "linear_nnz": 4019200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1306624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2967552, - "linear_dense_total": 4718592, - "linear_nnz": 4274176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1475584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3105792, - "linear_dense_total": 4718592, - "linear_nnz": 4581376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1285120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2934784, - "linear_dense_total": 4718592, - "linear_nnz": 4219904, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1235968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2500608, - "linear_dense_total": 4718592, - "linear_nnz": 3736576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 983040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1604608, - "linear_dense_total": 4718592, - "linear_nnz": 2587648, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 965632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 661504, - "linear_dense_total": 4718592, - "linear_nnz": 1627136, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 650240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 230400, - "linear_dense_total": 4718592, - "linear_nnz": 880640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 32956416, - "linear_sparsity": 61.19791666666667, - "linear_total": 84934656, - "nnz": 56885634, - "total": 108893186, - "total_sparsity": 47.76015277944021 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.29990539262063, - "f1": 87.09851869948527 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 24.736273986816407, - "eval_elapsed_time": 32.05209435708821 - }, - "speedup": 1.5602346992898202, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 527360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3380224, - "linear_dense_total": 4718592, - "linear_nnz": 3907584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3661824, - "linear_dense_total": 4718592, - "linear_nnz": 4186112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 307200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 595968, - "linear_dense_total": 4718592, - "linear_nnz": 903168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 207872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 615424, - "linear_dense_total": 4718592, - "linear_nnz": 823296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 598016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4031488, - "linear_dense_total": 4718592, - "linear_nnz": 4629504, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 930816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4107264, - "linear_dense_total": 4718592, - "linear_nnz": 5038080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 824320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3997696, - "linear_dense_total": 4718592, - "linear_nnz": 4822016, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 746496, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4027392, - "linear_dense_total": 4718592, - "linear_nnz": 4773888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 670720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3737600, - "linear_dense_total": 4718592, - "linear_nnz": 4408320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 794624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2995200, - "linear_dense_total": 4718592, - "linear_nnz": 3789824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 419840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1756160, - "linear_dense_total": 4718592, - "linear_nnz": 2176000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 411648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 600064, - "linear_dense_total": 4718592, - "linear_nnz": 1011712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 40469504, - "linear_sparsity": 52.35218942901234, - "linear_total": 84934656, - "nnz": 64400930, - "total": 108893186, - "total_sparsity": 40.85862268737366 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.22421948912014, - "f1": 87.0664817371684 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 24.675214500427245, - "eval_elapsed_time": 31.986000607255846 - }, - "speedup": 1.5640955422982379, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 501760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3380224, - "linear_dense_total": 4718592, - "linear_nnz": 3881984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 528384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3656704, - "linear_dense_total": 4718592, - "linear_nnz": 4185088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 313344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 561152, - "linear_dense_total": 4718592, - "linear_nnz": 874496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 200704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 617472, - "linear_dense_total": 4718592, - "linear_nnz": 818176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 581632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4121600, - "linear_dense_total": 4718592, - "linear_nnz": 4703232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 916480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4144128, - "linear_dense_total": 4718592, - "linear_nnz": 5060608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 833536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4060160, - "linear_dense_total": 4718592, - "linear_nnz": 4893696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 741376, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4076544, - "linear_dense_total": 4718592, - "linear_nnz": 4817920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 644096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3815424, - "linear_dense_total": 4718592, - "linear_nnz": 4459520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 757760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2962432, - "linear_dense_total": 4718592, - "linear_nnz": 3720192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 380928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1689600, - "linear_dense_total": 4718592, - "linear_nnz": 2070528, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 395264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 571392, - "linear_dense_total": 4718592, - "linear_nnz": 966656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 40452096, - "linear_sparsity": 52.37268518518518, - "linear_total": 84934656, - "nnz": 64383586, - "total": 108893186, - "total_sparsity": 40.874550222086434 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.67833491012298, - "f1": 87.14623278516426 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 25.61453672027588, - "eval_elapsed_time": 32.96429116372019 - }, - "speedup": 1.5067378897710322, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 571392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3765248, - "linear_dense_total": 4718592, - "linear_nnz": 4336640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 599040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3852288, - "linear_dense_total": 4718592, - "linear_nnz": 4451328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 374784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 672768, - "linear_dense_total": 4718592, - "linear_nnz": 1047552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 235520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 706560, - "linear_dense_total": 4718592, - "linear_nnz": 942080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 695296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4087808, - "linear_dense_total": 4718592, - "linear_nnz": 4783104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 996352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4050944, - "linear_dense_total": 4718592, - "linear_nnz": 5047296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 923648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4109312, - "linear_dense_total": 4718592, - "linear_nnz": 5032960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 865280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4041728, - "linear_dense_total": 4718592, - "linear_nnz": 4907008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 778240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3858432, - "linear_dense_total": 4718592, - "linear_nnz": 4636672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 883712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3359744, - "linear_dense_total": 4718592, - "linear_nnz": 4243456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 513024, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2305024, - "linear_dense_total": 4718592, - "linear_nnz": 2818048, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 462848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 826368, - "linear_dense_total": 4718592, - "linear_nnz": 1289216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 43535360, - "linear_sparsity": 48.742525077160494, - "linear_total": 84934656, - "nnz": 67469538, - "total": 108893186, - "total_sparsity": 38.04062450702838 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.94985808893094, - "f1": 86.768721062838 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 21.874919250488283, - "eval_elapsed_time": 29.121937923133373 - }, - "speedup": 1.7643216216448254, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 547840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1844224, - "linear_dense_total": 4718592, - "linear_nnz": 2392064, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 546816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2172928, - "linear_dense_total": 4718592, - "linear_nnz": 2719744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 356352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 392192, - "linear_dense_total": 4718592, - "linear_nnz": 748544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 217088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 434176, - "linear_dense_total": 4718592, - "linear_nnz": 651264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 675840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3196928, - "linear_dense_total": 4718592, - "linear_nnz": 3872768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 965632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3111936, - "linear_dense_total": 4718592, - "linear_nnz": 4077568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 896000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3107840, - "linear_dense_total": 4718592, - "linear_nnz": 4003840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 696320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3136512, - "linear_dense_total": 4718592, - "linear_nnz": 3832832, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 755712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2525184, - "linear_dense_total": 4718592, - "linear_nnz": 3280896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 799744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1711104, - "linear_dense_total": 4718592, - "linear_nnz": 2510848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 509952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 747520, - "linear_dense_total": 4718592, - "linear_nnz": 1257472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 420864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 261120, - "linear_dense_total": 4718592, - "linear_nnz": 681984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30029824, - "linear_sparsity": 64.6436149691358, - "linear_total": 84934656, - "nnz": 53955042, - "total": 108893186, - "total_sparsity": 50.45140657377771 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.8713339640492, - "f1": 85.84893170709621 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 19.24458102798462, - "eval_elapsed_time": 26.45731420116499 - }, - "speedup": 2.0054680821187447, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 647168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 789504, - "linear_dense_total": 4718592, - "linear_nnz": 1436672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 591872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1206272, - "linear_dense_total": 4718592, - "linear_nnz": 1798144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 359424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 263168, - "linear_dense_total": 4718592, - "linear_nnz": 622592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 240640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 271360, - "linear_dense_total": 4718592, - "linear_nnz": 512000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 843776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1739776, - "linear_dense_total": 4718592, - "linear_nnz": 2583552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1118208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1857536, - "linear_dense_total": 4718592, - "linear_nnz": 2975744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 913408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1760256, - "linear_dense_total": 4718592, - "linear_nnz": 2673664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 791552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1718272, - "linear_dense_total": 4718592, - "linear_nnz": 2509824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 755712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1330176, - "linear_dense_total": 4718592, - "linear_nnz": 2085888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 827392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 904192, - "linear_dense_total": 4718592, - "linear_nnz": 1731584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 726016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 257024, - "linear_dense_total": 4718592, - "linear_nnz": 983040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 464896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 118784, - "linear_dense_total": 4718592, - "linear_nnz": 583680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 20496384, - "linear_sparsity": 75.86805555555556, - "linear_total": 84934656, - "nnz": 44413282, - "total": 108893186, - "total_sparsity": 59.21390159343854 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.92809839167455, - "f1": 85.97854187426412 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 19.635457836151122, - "eval_elapsed_time": 26.92565976222977 - }, - "speedup": 1.9655458674518098, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 679936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 869376, - "linear_dense_total": 4718592, - "linear_nnz": 1549312, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 599040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1269760, - "linear_dense_total": 4718592, - "linear_nnz": 1868800, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 379904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 282624, - "linear_dense_total": 4718592, - "linear_nnz": 662528, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 258048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 290816, - "linear_dense_total": 4718592, - "linear_nnz": 548864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 875520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1863680, - "linear_dense_total": 4718592, - "linear_nnz": 2739200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1137664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1950720, - "linear_dense_total": 4718592, - "linear_nnz": 3088384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1033216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1787904, - "linear_dense_total": 4718592, - "linear_nnz": 2821120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 850944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1858560, - "linear_dense_total": 4718592, - "linear_nnz": 2709504, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 798720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1426432, - "linear_dense_total": 4718592, - "linear_nnz": 2225152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 878592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 987136, - "linear_dense_total": 4718592, - "linear_nnz": 1865728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 782336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 267264, - "linear_dense_total": 4718592, - "linear_nnz": 1049600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 504832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 144384, - "linear_dense_total": 4718592, - "linear_nnz": 649216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 21777408, - "linear_sparsity": 74.35980902777779, - "linear_total": 84934656, - "nnz": 45695714, - "total": 108893186, - "total_sparsity": 58.036204395746125 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 76.79280983916746, - "f1": 85.3167029862563 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 30 - }, - "speed": { - "cuda_eval_elapsed_time": 16.872496753692626, - "eval_elapsed_time": 24.01387820020318 - }, - "speedup": 2.2874144573134694, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 512000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 512000, - "linear_dense_total": 4718592, - "linear_nnz": 1024000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 551936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 685056, - "linear_dense_total": 4718592, - "linear_nnz": 1236992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 304128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 197632, - "linear_dense_total": 4718592, - "linear_nnz": 501760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 197632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 220160, - "linear_dense_total": 4718592, - "linear_nnz": 417792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 722944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1211392, - "linear_dense_total": 4718592, - "linear_nnz": 1934336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 954368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1397760, - "linear_dense_total": 4718592, - "linear_nnz": 2352128, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 790528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1238016, - "linear_dense_total": 4718592, - "linear_nnz": 2028544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 584704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1295360, - "linear_dense_total": 4718592, - "linear_nnz": 1880064, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 608256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1018880, - "linear_dense_total": 4718592, - "linear_nnz": 1627136, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 740352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 576512, - "linear_dense_total": 4718592, - "linear_nnz": 1316864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 510976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 162816, - "linear_dense_total": 4718592, - "linear_nnz": 673792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 357376, - "linear_attention_total": 2359296, - "linear_dense_nnz": 94208, - "linear_dense_total": 4718592, - "linear_nnz": 451584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15444992, - "linear_sparsity": 81.81544174382715, - "linear_total": 84934656, - "nnz": 39356610, - "total": 108893186, - "total_sparsity": 63.85760078688487 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.04824976348155, - "f1": 85.17930403802184 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 30 - }, - "speed": { - "cuda_eval_elapsed_time": 16.85802384185791, - "eval_elapsed_time": 24.0219326200895 - }, - "speedup": 2.289378243109522, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 513024, - "linear_attention_total": 2359296, - "linear_dense_nnz": 519168, - "linear_dense_total": 4718592, - "linear_nnz": 1032192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 523264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 692224, - "linear_dense_total": 4718592, - "linear_nnz": 1215488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 312320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 206848, - "linear_dense_total": 4718592, - "linear_nnz": 519168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 186368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 215040, - "linear_dense_total": 4718592, - "linear_nnz": 401408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 683008, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1239040, - "linear_dense_total": 4718592, - "linear_nnz": 1922048, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 945152, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1374208, - "linear_dense_total": 4718592, - "linear_nnz": 2319360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 809984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1235968, - "linear_dense_total": 4718592, - "linear_nnz": 2045952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 581632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1265664, - "linear_dense_total": 4718592, - "linear_nnz": 1847296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 600064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1007616, - "linear_dense_total": 4718592, - "linear_nnz": 1607680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 708608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 578560, - "linear_dense_total": 4718592, - "linear_nnz": 1287168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 473088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 158720, - "linear_dense_total": 4718592, - "linear_nnz": 631808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 352256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 90112, - "linear_dense_total": 4718592, - "linear_nnz": 442368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15271936, - "linear_sparsity": 82.0191936728395, - "linear_total": 84934656, - "nnz": 39183362, - "total": 108893186, - "total_sparsity": 64.01669981444019 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.70104068117313, - "f1": 85.88451743537976 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 20.68525614929199, - "eval_elapsed_time": 27.97377561684698 - }, - "speedup": 1.8657923656745288, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 413696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2119680, - "linear_dense_total": 4718592, - "linear_nnz": 2533376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 364544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2476032, - "linear_dense_total": 4718592, - "linear_nnz": 2840576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 237568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 386048, - "linear_dense_total": 4718592, - "linear_nnz": 623616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 144384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 460800, - "linear_dense_total": 4718592, - "linear_nnz": 605184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 397312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3445760, - "linear_dense_total": 4718592, - "linear_nnz": 3843072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 666624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3402752, - "linear_dense_total": 4718592, - "linear_nnz": 4069376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 492544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3339264, - "linear_dense_total": 4718592, - "linear_nnz": 3831808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 519168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3194880, - "linear_dense_total": 4718592, - "linear_nnz": 3714048, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 448512, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2751488, - "linear_dense_total": 4718592, - "linear_nnz": 3200000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 576512, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1839104, - "linear_dense_total": 4718592, - "linear_nnz": 2415616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 271360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 940032, - "linear_dense_total": 4718592, - "linear_nnz": 1211392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 317440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 302080, - "linear_dense_total": 4718592, - "linear_nnz": 619520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 29507584, - "linear_sparsity": 65.25848765432099, - "linear_total": 84934656, - "nnz": 53430466, - "total": 108893186, - "total_sparsity": 50.93314103235074 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.68211920529801, - "f1": 86.11161494070976 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 21.577418830871583, - "eval_elapsed_time": 28.903804030269384 - }, - "speedup": 1.7886473497076825, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 459776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2385920, - "linear_dense_total": 4718592, - "linear_nnz": 2845696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 374784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2798592, - "linear_dense_total": 4718592, - "linear_nnz": 3173376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 254976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 416768, - "linear_dense_total": 4718592, - "linear_nnz": 671744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 165888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 466944, - "linear_dense_total": 4718592, - "linear_nnz": 632832, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 411648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3454976, - "linear_dense_total": 4718592, - "linear_nnz": 3866624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 727040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3496960, - "linear_dense_total": 4718592, - "linear_nnz": 4224000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 541696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3412992, - "linear_dense_total": 4718592, - "linear_nnz": 3954688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 545792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3447808, - "linear_dense_total": 4718592, - "linear_nnz": 3993600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 493568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2933760, - "linear_dense_total": 4718592, - "linear_nnz": 3427328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 641024, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2000896, - "linear_dense_total": 4718592, - "linear_nnz": 2641920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 288768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1004544, - "linear_dense_total": 4718592, - "linear_nnz": 1293312, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 338944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 339968, - "linear_dense_total": 4718592, - "linear_nnz": 678912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 31404032, - "linear_sparsity": 63.025655864197525, - "linear_total": 84934656, - "nnz": 55329122, - "total": 108893186, - "total_sparsity": 49.1895461668281 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.96594134342479, - "f1": 86.01491496793933 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 21.28239717102051, - "eval_elapsed_time": 28.641465611290187 - }, - "speedup": 1.8134420053923117, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 435200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2226176, - "linear_dense_total": 4718592, - "linear_nnz": 2661376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 359424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2727936, - "linear_dense_total": 4718592, - "linear_nnz": 3087360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 252928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 411648, - "linear_dense_total": 4718592, - "linear_nnz": 664576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 158720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 487424, - "linear_dense_total": 4718592, - "linear_nnz": 646144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 421888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3473408, - "linear_dense_total": 4718592, - "linear_nnz": 3895296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 710656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3451904, - "linear_dense_total": 4718592, - "linear_nnz": 4162560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 547840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3437568, - "linear_dense_total": 4718592, - "linear_nnz": 3985408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 556032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3325952, - "linear_dense_total": 4718592, - "linear_nnz": 3881984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 512000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2828288, - "linear_dense_total": 4718592, - "linear_nnz": 3340288, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 622592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1991680, - "linear_dense_total": 4718592, - "linear_nnz": 2614272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 276480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 979968, - "linear_dense_total": 4718592, - "linear_nnz": 1256448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 337920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 330752, - "linear_dense_total": 4718592, - "linear_nnz": 668672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30864384, - "linear_sparsity": 63.66102430555556, - "linear_total": 84934656, - "nnz": 54788706, - "total": 108893186, - "total_sparsity": 49.68582699012958 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 76.92526017029329, - "f1": 85.21713644985097 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 17.63341423416138, - "eval_elapsed_time": 24.82955563813448 - }, - "speedup": 2.1887078981336363, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 443392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1026048, - "linear_dense_total": 4718592, - "linear_nnz": 1469440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 396288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1296384, - "linear_dense_total": 4718592, - "linear_nnz": 1692672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 237568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 308224, - "linear_dense_total": 4718592, - "linear_nnz": 545792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 152576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 315392, - "linear_dense_total": 4718592, - "linear_nnz": 467968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 578560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2113536, - "linear_dense_total": 4718592, - "linear_nnz": 2692096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 755712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1973248, - "linear_dense_total": 4718592, - "linear_nnz": 2728960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 565248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1966080, - "linear_dense_total": 4718592, - "linear_nnz": 2531328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 546816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1887232, - "linear_dense_total": 4718592, - "linear_nnz": 2434048, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 476160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1502208, - "linear_dense_total": 4718592, - "linear_nnz": 1978368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 637952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1000448, - "linear_dense_total": 4718592, - "linear_nnz": 1638400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 310272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 310272, - "linear_dense_total": 4718592, - "linear_nnz": 620544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 313344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 144384, - "linear_dense_total": 4718592, - "linear_nnz": 457728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 19257344, - "linear_sparsity": 77.3268711419753, - "linear_total": 84934656, - "nnz": 43172098, - "total": 108893186, - "total_sparsity": 60.35371946964616 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.08609271523179, - "f1": 85.20287591064626 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 17.564620765686037, - "eval_elapsed_time": 24.740368818864226 - }, - "speedup": 2.1972801758844964, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 455680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1007616, - "linear_dense_total": 4718592, - "linear_nnz": 1463296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 399360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1300480, - "linear_dense_total": 4718592, - "linear_nnz": 1699840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 240640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 305152, - "linear_dense_total": 4718592, - "linear_nnz": 545792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 144384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 329728, - "linear_dense_total": 4718592, - "linear_nnz": 474112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 544768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2180096, - "linear_dense_total": 4718592, - "linear_nnz": 2724864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 731136, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1939456, - "linear_dense_total": 4718592, - "linear_nnz": 2670592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 557056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1941504, - "linear_dense_total": 4718592, - "linear_nnz": 2498560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 527360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1880064, - "linear_dense_total": 4718592, - "linear_nnz": 2407424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 472064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1456128, - "linear_dense_total": 4718592, - "linear_nnz": 1928192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 607232, - "linear_attention_total": 2359296, - "linear_dense_nnz": 977920, - "linear_dense_total": 4718592, - "linear_nnz": 1585152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 289792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 317440, - "linear_dense_total": 4718592, - "linear_nnz": 607232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 308224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 147456, - "linear_dense_total": 4718592, - "linear_nnz": 455680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 19060736, - "linear_sparsity": 77.55835262345678, - "linear_total": 84934656, - "nnz": 42975330, - "total": 108893186, - "total_sparsity": 60.53441764482857 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.01986754966887, - "f1": 85.2617013700351 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 18.277880432128907, - "eval_elapsed_time": 25.53750513214618 - }, - "speedup": 2.1115354785629177, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 480256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1234944, - "linear_dense_total": 4718592, - "linear_nnz": 1715200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 400384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1495040, - "linear_dense_total": 4718592, - "linear_nnz": 1895424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 267264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 326656, - "linear_dense_total": 4718592, - "linear_nnz": 593920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 163840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 337920, - "linear_dense_total": 4718592, - "linear_nnz": 501760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 594944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2417664, - "linear_dense_total": 4718592, - "linear_nnz": 3012608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 813056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2281472, - "linear_dense_total": 4718592, - "linear_nnz": 3094528, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 599040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2163712, - "linear_dense_total": 4718592, - "linear_nnz": 2762752, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 562176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2145280, - "linear_dense_total": 4718592, - "linear_nnz": 2707456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 531456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1701888, - "linear_dense_total": 4718592, - "linear_nnz": 2233344, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 678912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1062912, - "linear_dense_total": 4718592, - "linear_nnz": 1741824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 338944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 370688, - "linear_dense_total": 4718592, - "linear_nnz": 709632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 359424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 164864, - "linear_dense_total": 4718592, - "linear_nnz": 524288, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 21492736, - "linear_sparsity": 74.6949749228395, - "linear_total": 84934656, - "nnz": 45409666, - "total": 108893186, - "total_sparsity": 58.29889117212532 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 76.98202459791864, - "f1": 85.22056943761015 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 17.92396342086792, - "eval_elapsed_time": 25.119796799961478 - }, - "speedup": 2.153228730674472, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 458752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1139712, - "linear_dense_total": 4718592, - "linear_nnz": 1598464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 398336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1427456, - "linear_dense_total": 4718592, - "linear_nnz": 1825792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 271360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 326656, - "linear_dense_total": 4718592, - "linear_nnz": 598016, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 162816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 347136, - "linear_dense_total": 4718592, - "linear_nnz": 509952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 596992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2257920, - "linear_dense_total": 4718592, - "linear_nnz": 2854912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 781312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2123776, - "linear_dense_total": 4718592, - "linear_nnz": 2905088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 620544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2023424, - "linear_dense_total": 4718592, - "linear_nnz": 2643968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 573440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1970176, - "linear_dense_total": 4718592, - "linear_nnz": 2543616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 460800, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1588224, - "linear_dense_total": 4718592, - "linear_nnz": 2049024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 638976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1069056, - "linear_dense_total": 4718592, - "linear_nnz": 1708032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 307200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 359424, - "linear_dense_total": 4718592, - "linear_nnz": 666624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 327680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 161792, - "linear_dense_total": 4718592, - "linear_nnz": 489472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 20392960, - "linear_sparsity": 75.98982445987654, - "linear_total": 84934656, - "nnz": 44308674, - "total": 108893186, - "total_sparsity": 59.309966373837206 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl5_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 81.51371807000946, - "f1": 88.67903677006836 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5 - }, - "speed": { - "cuda_eval_elapsed_time": 31.30978426361084, - "eval_elapsed_time": 38.71227815328166 - }, - "speedup": 1.232662374177603, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 804864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4457472, - "linear_dense_total": 4718592, - "linear_nnz": 5262336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 771072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4464640, - "linear_dense_total": 4718592, - "linear_nnz": 5235712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 614400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1607680, - "linear_dense_total": 4718592, - "linear_nnz": 2222080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 389120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1230848, - "linear_dense_total": 4718592, - "linear_nnz": 1619968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1152000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4517888, - "linear_dense_total": 4718592, - "linear_nnz": 5669888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1312768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4562944, - "linear_dense_total": 4718592, - "linear_nnz": 5875712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1501184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4555776, - "linear_dense_total": 4718592, - "linear_nnz": 6056960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1377280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4520960, - "linear_dense_total": 4718592, - "linear_nnz": 5898240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1357824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4473856, - "linear_dense_total": 4718592, - "linear_nnz": 5831680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1192960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4290560, - "linear_dense_total": 4718592, - "linear_nnz": 5483520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1069056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3889152, - "linear_dense_total": 4718592, - "linear_nnz": 4958208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 718848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2518016, - "linear_dense_total": 4718592, - "linear_nnz": 3236864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 57351168, - "linear_sparsity": 32.47612847222222, - "linear_total": 84934656, - "nnz": 81295202, - "total": 108893186, - "total_sparsity": 25.344087186502197 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl5_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 81.47587511825922, - "f1": 88.73698799207777 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5 - }, - "speed": { - "cuda_eval_elapsed_time": 31.817585739135744, - "eval_elapsed_time": 39.2419764213264 - }, - "speedup": 1.2129893613486789, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 921600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4316160, - "linear_dense_total": 4718592, - "linear_nnz": 5237760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 829440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4311040, - "linear_dense_total": 4718592, - "linear_nnz": 5140480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 671744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2001920, - "linear_dense_total": 4718592, - "linear_nnz": 2673664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 409600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1304576, - "linear_dense_total": 4718592, - "linear_nnz": 1714176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1221632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4419584, - "linear_dense_total": 4718592, - "linear_nnz": 5641216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1386496, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4429824, - "linear_dense_total": 4718592, - "linear_nnz": 5816320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1540096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4457472, - "linear_dense_total": 4718592, - "linear_nnz": 5997568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1548288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4420608, - "linear_dense_total": 4718592, - "linear_nnz": 5968896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1364992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4320256, - "linear_dense_total": 4718592, - "linear_nnz": 5685248, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1272832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4186112, - "linear_dense_total": 4718592, - "linear_nnz": 5458944, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1173504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3787776, - "linear_dense_total": 4718592, - "linear_nnz": 4961280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 727040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2839552, - "linear_dense_total": 4718592, - "linear_nnz": 3566592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 57862144, - "linear_sparsity": 31.87451774691358, - "linear_total": 84934656, - "nnz": 81807426, - "total": 108893186, - "total_sparsity": 24.873695953757846 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.66887417218543, - "f1": 87.3881230572442 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 17.326403350830077, - "eval_elapsed_time": 24.523588876239955 - }, - "speedup": 2.227490161916501, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 643072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 634368, - "linear_dense_total": 4718592, - "linear_nnz": 1277440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 622592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 916992, - "linear_dense_total": 4718592, - "linear_nnz": 1539584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 463872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 112128, - "linear_dense_total": 4718592, - "linear_nnz": 576000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 278528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 313344, - "linear_dense_total": 4718592, - "linear_nnz": 591872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1051648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1016832, - "linear_dense_total": 4718592, - "linear_nnz": 2068480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1257472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1076736, - "linear_dense_total": 4718592, - "linear_nnz": 2334208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1315840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1158144, - "linear_dense_total": 4718592, - "linear_nnz": 2473984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1004544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1073664, - "linear_dense_total": 4718592, - "linear_nnz": 2078208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1004544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 815616, - "linear_dense_total": 4718592, - "linear_nnz": 1820160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 925696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 629760, - "linear_dense_total": 4718592, - "linear_nnz": 1555456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 899072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 337920, - "linear_dense_total": 4718592, - "linear_nnz": 1236992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 523264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 139776, - "linear_dense_total": 4718592, - "linear_nnz": 663040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 18215424, - "linear_sparsity": 78.55360243055556, - "linear_total": 84934656, - "nnz": 42128141, - "total": 108893186, - "total_sparsity": 61.31241765669342 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.39451277199622, - "f1": 87.14755939306319 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 17.057066314697266, - "eval_elapsed_time": 24.182081679347903 - }, - "speedup": 2.262663009764823, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 809984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 297984, - "linear_dense_total": 4718592, - "linear_nnz": 1107968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 720896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 483840, - "linear_dense_total": 4718592, - "linear_nnz": 1204736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 478208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 73728, - "linear_dense_total": 4718592, - "linear_nnz": 551936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 312320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 159744, - "linear_dense_total": 4718592, - "linear_nnz": 472064, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1098752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 619008, - "linear_dense_total": 4718592, - "linear_nnz": 1717760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1309696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 657408, - "linear_dense_total": 4718592, - "linear_nnz": 1967104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1362944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 705024, - "linear_dense_total": 4718592, - "linear_nnz": 2067968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1074176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 668160, - "linear_dense_total": 4718592, - "linear_nnz": 1742336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1049600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 516096, - "linear_dense_total": 4718592, - "linear_nnz": 1565696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 958464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 384000, - "linear_dense_total": 4718592, - "linear_nnz": 1342464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 949248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 204288, - "linear_dense_total": 4718592, - "linear_nnz": 1153536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 636928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 92160, - "linear_dense_total": 4718592, - "linear_nnz": 729088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15622656, - "linear_sparsity": 81.6062644675926, - "linear_total": 84934656, - "nnz": 39533983, - "total": 108893186, - "total_sparsity": 63.694713643514845 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl2--2021-01-21--00-53-13/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.67549668874172, - "f1": 86.51098653495667 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 2.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 17.252509830474853, - "eval_elapsed_time": 24.480217491276562 - }, - "speedup": 2.2370306340702912, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 864256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 127488, - "linear_dense_total": 4718592, - "linear_nnz": 991744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 748544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 216576, - "linear_dense_total": 4718592, - "linear_nnz": 965120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 502784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 53760, - "linear_dense_total": 4718592, - "linear_nnz": 556544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 360448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 81408, - "linear_dense_total": 4718592, - "linear_nnz": 441856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1163264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 324096, - "linear_dense_total": 4718592, - "linear_nnz": 1487360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1389568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 377856, - "linear_dense_total": 4718592, - "linear_nnz": 1767424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1449984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 414720, - "linear_dense_total": 4718592, - "linear_nnz": 1864704, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1349632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 364032, - "linear_dense_total": 4718592, - "linear_nnz": 1713664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1187840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 293376, - "linear_dense_total": 4718592, - "linear_nnz": 1481216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 964608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 225792, - "linear_dense_total": 4718592, - "linear_nnz": 1190400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1063936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 127488, - "linear_dense_total": 4718592, - "linear_nnz": 1191424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 650240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 58368, - "linear_dense_total": 4718592, - "linear_nnz": 708608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 14360064, - "linear_sparsity": 83.0928096064815, - "linear_total": 84934656, - "nnz": 38271273, - "total": 108893186, - "total_sparsity": 64.85429951512302 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.3349101229896, - "f1": 86.4116267700138 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 14.760263885498047, - "eval_elapsed_time": 21.897933847736567 - }, - "speedup": 2.6147495264830645, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 522240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 411648, - "linear_dense_total": 4718592, - "linear_nnz": 933888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 523264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 592896, - "linear_dense_total": 4718592, - "linear_nnz": 1116160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 367616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 87552, - "linear_dense_total": 4718592, - "linear_nnz": 455168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 222208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 199680, - "linear_dense_total": 4718592, - "linear_nnz": 421888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 675840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 698880, - "linear_dense_total": 4718592, - "linear_nnz": 1374720, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 977920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 714240, - "linear_dense_total": 4718592, - "linear_nnz": 1692160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 825344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 834048, - "linear_dense_total": 4718592, - "linear_nnz": 1659392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 672768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 743424, - "linear_dense_total": 4718592, - "linear_nnz": 1416192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 638976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 568320, - "linear_dense_total": 4718592, - "linear_nnz": 1207296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 785408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 450048, - "linear_dense_total": 4718592, - "linear_nnz": 1235456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 514048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 264192, - "linear_dense_total": 4718592, - "linear_nnz": 778240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 424960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 101376, - "linear_dense_total": 4718592, - "linear_nnz": 526336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 12816896, - "linear_sparsity": 84.9096981095679, - "linear_total": 84934656, - "nnz": 36724619, - "total": 108893186, - "total_sparsity": 66.2746399944621 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.37275307473983, - "f1": 86.39441106336629 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 14.746898548126222, - "eval_elapsed_time": 21.86237431317568 - }, - "speedup": 2.61711931355729, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 519168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 411648, - "linear_dense_total": 4718592, - "linear_nnz": 930816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 536576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 592896, - "linear_dense_total": 4718592, - "linear_nnz": 1129472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 356352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 87552, - "linear_dense_total": 4718592, - "linear_nnz": 443904, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 226304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 199680, - "linear_dense_total": 4718592, - "linear_nnz": 425984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 667648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 698880, - "linear_dense_total": 4718592, - "linear_nnz": 1366528, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 967680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 714240, - "linear_dense_total": 4718592, - "linear_nnz": 1681920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 835584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 834048, - "linear_dense_total": 4718592, - "linear_nnz": 1669632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 668672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 743424, - "linear_dense_total": 4718592, - "linear_nnz": 1412096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 653312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 568320, - "linear_dense_total": 4718592, - "linear_nnz": 1221632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 787456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 450048, - "linear_dense_total": 4718592, - "linear_nnz": 1237504, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 493568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 264192, - "linear_dense_total": 4718592, - "linear_nnz": 757760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 424960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 101376, - "linear_dense_total": 4718592, - "linear_nnz": 526336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 12803584, - "linear_sparsity": 84.92537133487654, - "linear_total": 84934656, - "nnz": 36711275, - "total": 108893186, - "total_sparsity": 66.28689420474849 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.8240302743614, - "f1": 86.11992485005756 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 14.268565601348877, - "eval_elapsed_time": 21.374552259687334 - }, - "speedup": 2.704854439028025, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 550912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 181248, - "linear_dense_total": 4718592, - "linear_nnz": 732160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 535552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 299520, - "linear_dense_total": 4718592, - "linear_nnz": 835072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 364544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 58368, - "linear_dense_total": 4718592, - "linear_nnz": 422912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 239616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 96768, - "linear_dense_total": 4718592, - "linear_nnz": 336384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 721920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 407040, - "linear_dense_total": 4718592, - "linear_nnz": 1128960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1111040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 440832, - "linear_dense_total": 4718592, - "linear_nnz": 1551872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 892928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 496128, - "linear_dense_total": 4718592, - "linear_nnz": 1389056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 663552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 433152, - "linear_dense_total": 4718592, - "linear_nnz": 1096704, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 662528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 337920, - "linear_dense_total": 4718592, - "linear_nnz": 1000448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 801792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 268800, - "linear_dense_total": 4718592, - "linear_nnz": 1070592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 645120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 158208, - "linear_dense_total": 4718592, - "linear_nnz": 803328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 424960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 73728, - "linear_dense_total": 4718592, - "linear_nnz": 498688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 10866176, - "linear_sparsity": 87.20642843364197, - "linear_total": 84934656, - "nnz": 34772839, - "total": 108893186, - "total_sparsity": 68.06702028169144 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl2--2021-01-21--00-54-43/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 76.9914853358562, - "f1": 85.26341062121247 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 2.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 14.846498733520509, - "eval_elapsed_time": 21.962527931667864 - }, - "speedup": 2.599561936999493, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 598016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 76800, - "linear_dense_total": 4718592, - "linear_nnz": 674816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 621568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 129024, - "linear_dense_total": 4718592, - "linear_nnz": 750592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 395264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 36864, - "linear_dense_total": 4718592, - "linear_nnz": 432128, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 238592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 52224, - "linear_dense_total": 4718592, - "linear_nnz": 290816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 937984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 199680, - "linear_dense_total": 4718592, - "linear_nnz": 1137664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1193984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 264192, - "linear_dense_total": 4718592, - "linear_nnz": 1458176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1057792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 278016, - "linear_dense_total": 4718592, - "linear_nnz": 1335808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 614400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 228864, - "linear_dense_total": 4718592, - "linear_nnz": 843264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 759808, - "linear_attention_total": 2359296, - "linear_dense_nnz": 188928, - "linear_dense_total": 4718592, - "linear_nnz": 948736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 830464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 148992, - "linear_dense_total": 4718592, - "linear_nnz": 979456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 753664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 79872, - "linear_dense_total": 4718592, - "linear_nnz": 833536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 432128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 46080, - "linear_dense_total": 4718592, - "linear_nnz": 478208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 10163200, - "linear_sparsity": 88.03409529320987, - "linear_total": 84934656, - "nnz": 34069864, - "total": 108893186, - "total_sparsity": 68.71258409134985 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l30-dl0-25--2021-01-23--20-20-19/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.06054872280038, - "f1": 86.20063710644014 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 30.0 - }, - "speed": { - "cuda_eval_elapsed_time": 14.394198833465577, - "eval_elapsed_time": 21.72890411503613 - }, - "speedup": 2.681246344578876, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 455680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 597504, - "linear_dense_total": 4718592, - "linear_nnz": 1053184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 364544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 854016, - "linear_dense_total": 4718592, - "linear_nnz": 1218560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 286720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 118272, - "linear_dense_total": 4718592, - "linear_nnz": 404992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 162816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 276480, - "linear_dense_total": 4718592, - "linear_nnz": 439296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 529408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 973824, - "linear_dense_total": 4718592, - "linear_nnz": 1503232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 749568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 964608, - "linear_dense_total": 4718592, - "linear_nnz": 1714176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 578560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1047552, - "linear_dense_total": 4718592, - "linear_nnz": 1626112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 600064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 992256, - "linear_dense_total": 4718592, - "linear_nnz": 1592320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 546816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 775680, - "linear_dense_total": 4718592, - "linear_nnz": 1322496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 686080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 615936, - "linear_dense_total": 4718592, - "linear_nnz": 1302016, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 335872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 342528, - "linear_dense_total": 4718592, - "linear_nnz": 678400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 358400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 135168, - "linear_dense_total": 4718592, - "linear_nnz": 493568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 13348352, - "linear_sparsity": 84.28397472993827, - "linear_total": 84934656, - "nnz": 37255475, - "total": 108893186, - "total_sparsity": 65.78713841653968 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l30-dl0-5--2021-01-23--20-19-50/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.11447492904446, - "f1": 85.59611837921153 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 30.0 - }, - "speed": { - "cuda_eval_elapsed_time": 13.499527885437011, - "eval_elapsed_time": 20.856850353069603 - }, - "speedup": 2.8589439077351635, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 432128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 290304, - "linear_dense_total": 4718592, - "linear_nnz": 722432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 489472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 459264, - "linear_dense_total": 4718592, - "linear_nnz": 948736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 309248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 73728, - "linear_dense_total": 4718592, - "linear_nnz": 382976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 198656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 153600, - "linear_dense_total": 4718592, - "linear_nnz": 352256, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 578560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 557568, - "linear_dense_total": 4718592, - "linear_nnz": 1136128, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 779264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 583680, - "linear_dense_total": 4718592, - "linear_nnz": 1362944, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 575488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 665088, - "linear_dense_total": 4718592, - "linear_nnz": 1240576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 540672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 614400, - "linear_dense_total": 4718592, - "linear_nnz": 1155072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 590848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 463872, - "linear_dense_total": 4718592, - "linear_nnz": 1054720, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 696320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 370176, - "linear_dense_total": 4718592, - "linear_nnz": 1066496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 388096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 235008, - "linear_dense_total": 4718592, - "linear_nnz": 623104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 363520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 89088, - "linear_dense_total": 4718592, - "linear_nnz": 452608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 10498048, - "linear_sparsity": 87.63985339506173, - "linear_total": 84934656, - "nnz": 34403512, - "total": 108893186, - "total_sparsity": 68.40618475429675 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l30-dl0-5--2021-01-23--20-19-50/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.37937559129612, - "f1": 85.69020560735045 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 30.0 - }, - "speed": { - "cuda_eval_elapsed_time": 13.492438529968261, - "eval_elapsed_time": 20.86975116888061 - }, - "speedup": 2.860446087610368, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 451584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 290304, - "linear_dense_total": 4718592, - "linear_nnz": 741888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 495616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 459264, - "linear_dense_total": 4718592, - "linear_nnz": 954880, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 296960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 73728, - "linear_dense_total": 4718592, - "linear_nnz": 370688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 194560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 153600, - "linear_dense_total": 4718592, - "linear_nnz": 348160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 583680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 557568, - "linear_dense_total": 4718592, - "linear_nnz": 1141248, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 789504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 583680, - "linear_dense_total": 4718592, - "linear_nnz": 1373184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 582656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 665088, - "linear_dense_total": 4718592, - "linear_nnz": 1247744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 548864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 614400, - "linear_dense_total": 4718592, - "linear_nnz": 1163264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 578560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 463872, - "linear_dense_total": 4718592, - "linear_nnz": 1042432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 715776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 370176, - "linear_dense_total": 4718592, - "linear_nnz": 1085952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 375808, - "linear_attention_total": 2359296, - "linear_dense_nnz": 235008, - "linear_dense_total": 4718592, - "linear_nnz": 610816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 347136, - "linear_attention_total": 2359296, - "linear_dense_nnz": 89088, - "linear_dense_total": 4718592, - "linear_nnz": 436224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 10516480, - "linear_sparsity": 87.61815200617285, - "linear_total": 84934656, - "nnz": 34421912, - "total": 108893186, - "total_sparsity": 68.3892874619354 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.48249763481552, - "f1": 88.07285498416482 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 20.669778549194337, - "eval_elapsed_time": 27.982159624807537 - }, - "speedup": 1.8671894773093938, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 991232, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1107456, - "linear_dense_total": 4718592, - "linear_nnz": 2098688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 730112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1273344, - "linear_dense_total": 4718592, - "linear_nnz": 2003456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 624640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 162816, - "linear_dense_total": 4718592, - "linear_nnz": 787456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 419840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 477696, - "linear_dense_total": 4718592, - "linear_nnz": 897536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1225728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1505280, - "linear_dense_total": 4718592, - "linear_nnz": 2731008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1433600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1543680, - "linear_dense_total": 4718592, - "linear_nnz": 2977280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1566720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1609728, - "linear_dense_total": 4718592, - "linear_nnz": 3176448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1508352, - "linear_dense_total": 4718592, - "linear_nnz": 3081216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1361920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1125888, - "linear_dense_total": 4718592, - "linear_nnz": 2487808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1166336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 837120, - "linear_dense_total": 4718592, - "linear_nnz": 2003456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1148928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 468480, - "linear_dense_total": 4718592, - "linear_nnz": 1617408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 738304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 207360, - "linear_dense_total": 4718592, - "linear_nnz": 945664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 24807424, - "linear_sparsity": 70.79234182098766, - "linear_total": 84934656, - "nnz": 48725622, - "total": 108893186, - "total_sparsity": 55.25374562922606 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.70009460737937, - "f1": 88.04831949879843 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 20.71169916152954, - "eval_elapsed_time": 28.054355942178518 - }, - "speedup": 1.863410273796239, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 978944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1107456, - "linear_dense_total": 4718592, - "linear_nnz": 2086400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 721920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1273344, - "linear_dense_total": 4718592, - "linear_nnz": 1995264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 615424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 162816, - "linear_dense_total": 4718592, - "linear_nnz": 778240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 403456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 477696, - "linear_dense_total": 4718592, - "linear_nnz": 881152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1232896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1505280, - "linear_dense_total": 4718592, - "linear_nnz": 2738176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1455104, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1543680, - "linear_dense_total": 4718592, - "linear_nnz": 2998784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1598464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1609728, - "linear_dense_total": 4718592, - "linear_nnz": 3208192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1596416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1508352, - "linear_dense_total": 4718592, - "linear_nnz": 3104768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1373184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1125888, - "linear_dense_total": 4718592, - "linear_nnz": 2499072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1165312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 837120, - "linear_dense_total": 4718592, - "linear_nnz": 2002432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1163264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 468480, - "linear_dense_total": 4718592, - "linear_nnz": 1631744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 740352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 207360, - "linear_dense_total": 4718592, - "linear_nnz": 947712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 24871936, - "linear_sparsity": 70.71638695987654, - "linear_total": 84934656, - "nnz": 48790134, - "total": 108893186, - "total_sparsity": 55.19450225287742 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-95000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.79470198675497, - "f1": 88.10958975740277 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 20.632953029632567, - "eval_elapsed_time": 27.97396031860262 - }, - "speedup": 1.8705220212512832, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 976896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1125888, - "linear_dense_total": 4718592, - "linear_nnz": 2102784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 733184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1285632, - "linear_dense_total": 4718592, - "linear_nnz": 2018816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 636928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 168960, - "linear_dense_total": 4718592, - "linear_nnz": 805888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 418816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 485376, - "linear_dense_total": 4718592, - "linear_nnz": 904192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1252352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1523712, - "linear_dense_total": 4718592, - "linear_nnz": 2776064, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1437696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1555968, - "linear_dense_total": 4718592, - "linear_nnz": 2993664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1545216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1617408, - "linear_dense_total": 4718592, - "linear_nnz": 3162624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1574912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1514496, - "linear_dense_total": 4718592, - "linear_nnz": 3089408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1370112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1135104, - "linear_dense_total": 4718592, - "linear_nnz": 2505216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1178624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 847872, - "linear_dense_total": 4718592, - "linear_nnz": 2026496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1190912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 474624, - "linear_dense_total": 4718592, - "linear_nnz": 1665536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 748544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 208896, - "linear_dense_total": 4718592, - "linear_nnz": 957440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 25008128, - "linear_sparsity": 70.55603780864197, - "linear_total": 84934656, - "nnz": 48926434, - "total": 108893186, - "total_sparsity": 55.069333723048565 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.58656575212866, - "f1": 88.06903108265608 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 19.595643711090087, - "eval_elapsed_time": 26.718373194802552 - }, - "speedup": 1.9695394330694393, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1055744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 526848, - "linear_dense_total": 4718592, - "linear_nnz": 1582592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 809984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 752640, - "linear_dense_total": 4718592, - "linear_nnz": 1562624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 652288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 98304, - "linear_dense_total": 4718592, - "linear_nnz": 750592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 419840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 262656, - "linear_dense_total": 4718592, - "linear_nnz": 682496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1316864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 873984, - "linear_dense_total": 4718592, - "linear_nnz": 2190848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1468416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 952320, - "linear_dense_total": 4718592, - "linear_nnz": 2420736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1651712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1046016, - "linear_dense_total": 4718592, - "linear_nnz": 2697728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1616896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 986112, - "linear_dense_total": 4718592, - "linear_nnz": 2603008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1361920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 740352, - "linear_dense_total": 4718592, - "linear_nnz": 2102272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1265664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 559104, - "linear_dense_total": 4718592, - "linear_nnz": 1824768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1212416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 293376, - "linear_dense_total": 4718592, - "linear_nnz": 1505792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 749568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 113664, - "linear_dense_total": 4718592, - "linear_nnz": 863232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 20786688, - "linear_sparsity": 75.52625868055556, - "linear_total": 84934656, - "nnz": 44702229, - "total": 108893186, - "total_sparsity": 58.94855257518133 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl2--2021-01-21--00-51-49/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.10406811731315, - "f1": 87.56487698206614 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 2.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 19.459814723968506, - "eval_elapsed_time": 26.6199238197878 - }, - "speedup": 1.9832867657180042, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1210368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 210432, - "linear_dense_total": 4718592, - "linear_nnz": 1420800, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 977920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 403968, - "linear_dense_total": 4718592, - "linear_nnz": 1381888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 712704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 69120, - "linear_dense_total": 4718592, - "linear_nnz": 781824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 443392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 136704, - "linear_dense_total": 4718592, - "linear_nnz": 580096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1500160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 513024, - "linear_dense_total": 4718592, - "linear_nnz": 2013184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1526784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 588288, - "linear_dense_total": 4718592, - "linear_nnz": 2115072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1734656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 660480, - "linear_dense_total": 4718592, - "linear_nnz": 2395136, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1659904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 551424, - "linear_dense_total": 4718592, - "linear_nnz": 2211328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1486848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 456192, - "linear_dense_total": 4718592, - "linear_nnz": 1943040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1254400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 336384, - "linear_dense_total": 4718592, - "linear_nnz": 1590784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1267712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 173568, - "linear_dense_total": 4718592, - "linear_nnz": 1441280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 760832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 76800, - "linear_dense_total": 4718592, - "linear_nnz": 837632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 18712064, - "linear_sparsity": 77.96887056327161, - "linear_total": 84934656, - "nnz": 42626625, - "total": 108893186, - "total_sparsity": 60.85464429335368 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a16-l10-dl1--2021-01-24--15-45-00/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.00946073793756, - "f1": 87.65780769915727 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 16, - "dense_block_rows": 16, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 26.317300163269042, - "eval_elapsed_time": 33.56822411296889 - }, - "speedup": 1.4665027478478643, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 720896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1657600, - "linear_dense_total": 4718592, - "linear_nnz": 2378496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 719872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2046464, - "linear_dense_total": 4718592, - "linear_nnz": 2766336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 450560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 272128, - "linear_dense_total": 4718592, - "linear_nnz": 722688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 307456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 311808, - "linear_dense_total": 4718592, - "linear_nnz": 619264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1058304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2721792, - "linear_dense_total": 4718592, - "linear_nnz": 3780096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1227776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2707200, - "linear_dense_total": 4718592, - "linear_nnz": 3934976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1367808, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2789888, - "linear_dense_total": 4718592, - "linear_nnz": 4157696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1258240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2672384, - "linear_dense_total": 4718592, - "linear_nnz": 3930624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1130496, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2136064, - "linear_dense_total": 4718592, - "linear_nnz": 3266560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 988928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1491200, - "linear_dense_total": 4718592, - "linear_nnz": 2480128, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 888576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 653568, - "linear_dense_total": 4718592, - "linear_nnz": 1542144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 567296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 249088, - "linear_dense_total": 4718592, - "linear_nnz": 816384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30395392, - "linear_sparsity": 64.21320408950618, - "linear_total": 84934656, - "nnz": 54326914, - "total": 108893186, - "total_sparsity": 50.10990494850615 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l10-dl1--2021-01-24--15-47-42/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.9271523178808, - "f1": 88.21768668110452 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 33.44704815673828, - "eval_elapsed_time": 40.718972705770284 - }, - "speedup": 1.1538953400165994, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 528912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2409360, - "linear_dense_total": 4718592, - "linear_nnz": 2938272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 618448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2534112, - "linear_dense_total": 4718592, - "linear_nnz": 3152560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 357616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 309216, - "linear_dense_total": 4718592, - "linear_nnz": 666832, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 219536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 276672, - "linear_dense_total": 4718592, - "linear_nnz": 496208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 835904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2670704, - "linear_dense_total": 4718592, - "linear_nnz": 3506608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 958400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2670800, - "linear_dense_total": 4718592, - "linear_nnz": 3629200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1091248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2620432, - "linear_dense_total": 4718592, - "linear_nnz": 3711680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1029984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2535968, - "linear_dense_total": 4718592, - "linear_nnz": 3565952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 964544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2286960, - "linear_dense_total": 4718592, - "linear_nnz": 3251504, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 813552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1727488, - "linear_dense_total": 4718592, - "linear_nnz": 2541040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 744336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1096768, - "linear_dense_total": 4718592, - "linear_nnz": 1841104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 473664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 386800, - "linear_dense_total": 4718592, - "linear_nnz": 860464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30161424, - "linear_sparsity": 64.48867232711225, - "linear_total": 84934656, - "nnz": 54106194, - "total": 108893186, - "total_sparsity": 50.31259899035372 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l20-dl1--2021-01-24--15-48-09/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.72563859981078, - "f1": 87.37325813950282 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.62903995513916, - "eval_elapsed_time": 37.18844554480165 - }, - "speedup": 1.302586687378539, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 369200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1492736, - "linear_dense_total": 4718592, - "linear_nnz": 1861936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 467520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1640864, - "linear_dense_total": 4718592, - "linear_nnz": 2108384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 242752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 173136, - "linear_dense_total": 4718592, - "linear_nnz": 415888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 157440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 168992, - "linear_dense_total": 4718592, - "linear_nnz": 326432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 642896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1775952, - "linear_dense_total": 4718592, - "linear_nnz": 2418848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 744752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1758400, - "linear_dense_total": 4718592, - "linear_nnz": 2503152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 774128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1673184, - "linear_dense_total": 4718592, - "linear_nnz": 2447312, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 636736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1581904, - "linear_dense_total": 4718592, - "linear_nnz": 2218640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 605744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1321456, - "linear_dense_total": 4718592, - "linear_nnz": 1927200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 548160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 906608, - "linear_dense_total": 4718592, - "linear_nnz": 1454768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 486720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 524512, - "linear_dense_total": 4718592, - "linear_nnz": 1011232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 307184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 180400, - "linear_dense_total": 4718592, - "linear_nnz": 487584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 19181376, - "linear_sparsity": 77.41631401909721, - "linear_total": 84934656, - "nnz": 43119238, - "total": 108893186, - "total_sparsity": 60.40226245194075 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l20-dl1--2021-01-24--15-48-09/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.77294228949859, - "f1": 87.35885990249378 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.662232711791994, - "eval_elapsed_time": 37.211166836321354 - }, - "speedup": 1.3011290613342195, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 369024, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1492400, - "linear_dense_total": 4718592, - "linear_nnz": 1861424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 467072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1640528, - "linear_dense_total": 4718592, - "linear_nnz": 2107600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 242352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 173264, - "linear_dense_total": 4718592, - "linear_nnz": 415616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 157280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 168800, - "linear_dense_total": 4718592, - "linear_nnz": 326080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 643248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1776032, - "linear_dense_total": 4718592, - "linear_nnz": 2419280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 744560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1758000, - "linear_dense_total": 4718592, - "linear_nnz": 2502560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 773760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1672784, - "linear_dense_total": 4718592, - "linear_nnz": 2446544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 636208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1581568, - "linear_dense_total": 4718592, - "linear_nnz": 2217776, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 605664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1321040, - "linear_dense_total": 4718592, - "linear_nnz": 1926704, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 548160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 906384, - "linear_dense_total": 4718592, - "linear_nnz": 1454544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 486464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 524352, - "linear_dense_total": 4718592, - "linear_nnz": 1010816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 306864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 180544, - "linear_dense_total": 4718592, - "linear_nnz": 487408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 19176352, - "linear_sparsity": 77.42222915461035, - "linear_total": 84934656, - "nnz": 43114218, - "total": 108893186, - "total_sparsity": 60.40687247409585 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l40-dl1--2021-01-24--15-48-35/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.84295175023652, - "f1": 85.93146728512978 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 24.637864067077636, - "eval_elapsed_time": 32.05906807305291 - }, - "speedup": 1.5664666750452154, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 246400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 777312, - "linear_dense_total": 4718592, - "linear_nnz": 1023712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 381872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 904544, - "linear_dense_total": 4718592, - "linear_nnz": 1286416, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 169216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 118720, - "linear_dense_total": 4718592, - "linear_nnz": 287936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 113984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 110384, - "linear_dense_total": 4718592, - "linear_nnz": 224368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 460688, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1024768, - "linear_dense_total": 4718592, - "linear_nnz": 1485456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 556080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1006160, - "linear_dense_total": 4718592, - "linear_nnz": 1562240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 487760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 920208, - "linear_dense_total": 4718592, - "linear_nnz": 1407968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 403424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 859936, - "linear_dense_total": 4718592, - "linear_nnz": 1263360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 380560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 679056, - "linear_dense_total": 4718592, - "linear_nnz": 1059616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 400704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 463040, - "linear_dense_total": 4718592, - "linear_nnz": 863744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 266832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 283984, - "linear_dense_total": 4718592, - "linear_nnz": 550816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 225120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 102560, - "linear_dense_total": 4718592, - "linear_nnz": 327680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 11343312, - "linear_sparsity": 86.64466010199654, - "linear_total": 84934656, - "nnz": 35270510, - "total": 108893186, - "total_sparsity": 67.60999352154138 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l40-dl1--2021-01-24--15-48-35/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.47398297067171, - "f1": 85.88482767255138 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 24.631753623962403, - "eval_elapsed_time": 32.0392144843936 - }, - "speedup": 1.5668552712310941, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 244016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 761056, - "linear_dense_total": 4718592, - "linear_nnz": 1005072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 377744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 887392, - "linear_dense_total": 4718592, - "linear_nnz": 1265136, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 166848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 118128, - "linear_dense_total": 4718592, - "linear_nnz": 284976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 113088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 110272, - "linear_dense_total": 4718592, - "linear_nnz": 223360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 453520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1009968, - "linear_dense_total": 4718592, - "linear_nnz": 1463488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 549264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 989184, - "linear_dense_total": 4718592, - "linear_nnz": 1538448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 480528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 907024, - "linear_dense_total": 4718592, - "linear_nnz": 1387552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 397568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 846224, - "linear_dense_total": 4718592, - "linear_nnz": 1243792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 373968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 670144, - "linear_dense_total": 4718592, - "linear_nnz": 1044112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 394160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 457328, - "linear_dense_total": 4718592, - "linear_nnz": 851488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 262368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 280864, - "linear_dense_total": 4718592, - "linear_nnz": 543232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 222176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 101472, - "linear_dense_total": 4718592, - "linear_nnz": 323648, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 11174304, - "linear_sparsity": 86.84364601417825, - "linear_total": 84934656, - "nnz": 35101310, - "total": 108893186, - "total_sparsity": 67.7653751447772 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l40-dl1--2021-01-24--15-48-35/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.4077578051088, - "f1": 85.78500582028688 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 24.588402084350587, - "eval_elapsed_time": 32.04897632403299 - }, - "speedup": 1.5696177764204813, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 244080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 760240, - "linear_dense_total": 4718592, - "linear_nnz": 1004320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 377328, - "linear_attention_total": 2359296, - "linear_dense_nnz": 887488, - "linear_dense_total": 4718592, - "linear_nnz": 1264816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 166640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 117888, - "linear_dense_total": 4718592, - "linear_nnz": 284528, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 113056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 110240, - "linear_dense_total": 4718592, - "linear_nnz": 223296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 453680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1009680, - "linear_dense_total": 4718592, - "linear_nnz": 1463360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 549056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 988176, - "linear_dense_total": 4718592, - "linear_nnz": 1537232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 480112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 906608, - "linear_dense_total": 4718592, - "linear_nnz": 1386720, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 397488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 846544, - "linear_dense_total": 4718592, - "linear_nnz": 1244032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 373632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 669920, - "linear_dense_total": 4718592, - "linear_nnz": 1043552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 393728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 457008, - "linear_dense_total": 4718592, - "linear_nnz": 850736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 262272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 280816, - "linear_dense_total": 4718592, - "linear_nnz": 543088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 221824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 101360, - "linear_dense_total": 4718592, - "linear_nnz": 323184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 11168864, - "linear_sparsity": 86.85005093798226, - "linear_total": 84934656, - "nnz": 35095854, - "total": 108893186, - "total_sparsity": 67.77038555929478 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a8-l10-dl1--2021-01-24--15-46-20/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.6244087038789, - "f1": 88.02730364897265 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.553753234863283, - "eval_elapsed_time": 36.97127141384408 - }, - "speedup": 1.3059049623464731, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 634048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2103872, - "linear_dense_total": 4718592, - "linear_nnz": 2737920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 662208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2320064, - "linear_dense_total": 4718592, - "linear_nnz": 2982272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 398848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 299520, - "linear_dense_total": 4718592, - "linear_nnz": 698368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 262976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 298560, - "linear_dense_total": 4718592, - "linear_nnz": 561536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 975744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2637888, - "linear_dense_total": 4718592, - "linear_nnz": 3613632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1107392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2681408, - "linear_dense_total": 4718592, - "linear_nnz": 3788800, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1248448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2625472, - "linear_dense_total": 4718592, - "linear_nnz": 3873920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1182592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2558784, - "linear_dense_total": 4718592, - "linear_nnz": 3741376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1016896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2130624, - "linear_dense_total": 4718592, - "linear_nnz": 3147520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 915648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1523904, - "linear_dense_total": 4718592, - "linear_nnz": 2439552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 820288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 827456, - "linear_dense_total": 4718592, - "linear_nnz": 1647744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 514176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 305408, - "linear_dense_total": 4718592, - "linear_nnz": 819584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30052224, - "linear_sparsity": 64.61724175347221, - "linear_total": 84934656, - "nnz": 53991210, - "total": 108893186, - "total_sparsity": 50.418192374314394 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a8-l10-dl1--2021-01-24--15-46-20/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.54872280037843, - "f1": 87.861684752796 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.255816642761232, - "eval_elapsed_time": 36.84984774328768 - }, - "speedup": 1.319204091160467, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 633664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2102592, - "linear_dense_total": 4718592, - "linear_nnz": 2736256, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 662336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2319616, - "linear_dense_total": 4718592, - "linear_nnz": 2981952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 396032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 297856, - "linear_dense_total": 4718592, - "linear_nnz": 693888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 262208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 297792, - "linear_dense_total": 4718592, - "linear_nnz": 560000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 975296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2636544, - "linear_dense_total": 4718592, - "linear_nnz": 3611840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1107968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2680128, - "linear_dense_total": 4718592, - "linear_nnz": 3788096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1247936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2623936, - "linear_dense_total": 4718592, - "linear_nnz": 3871872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1181888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2558208, - "linear_dense_total": 4718592, - "linear_nnz": 3740096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1015040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2132480, - "linear_dense_total": 4718592, - "linear_nnz": 3147520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 913792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1523328, - "linear_dense_total": 4718592, - "linear_nnz": 2437120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 818752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 827264, - "linear_dense_total": 4718592, - "linear_nnz": 1646016, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 514368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 304640, - "linear_dense_total": 4718592, - "linear_nnz": 819008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30033664, - "linear_sparsity": 64.6390938464506, - "linear_total": 84934656, - "nnz": 53972650, - "total": 108893186, - "total_sparsity": 50.4352365996528 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a8-l20-dl1--2021-01-24--15-46-47/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.05392620624409, - "f1": 86.84949475139184 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 24.667898628234862, - "eval_elapsed_time": 32.10200677579269 - }, - "speedup": 1.5645594133095706, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 407936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1088064, - "linear_dense_total": 4718592, - "linear_nnz": 1496000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 569088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1378944, - "linear_dense_total": 4718592, - "linear_nnz": 1948032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 298112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 181568, - "linear_dense_total": 4718592, - "linear_nnz": 479680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 185728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 199488, - "linear_dense_total": 4718592, - "linear_nnz": 385216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 770560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1695552, - "linear_dense_total": 4718592, - "linear_nnz": 2466112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 902848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1680512, - "linear_dense_total": 4718592, - "linear_nnz": 2583360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 913216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1624640, - "linear_dense_total": 4718592, - "linear_nnz": 2537856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 749440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1534912, - "linear_dense_total": 4718592, - "linear_nnz": 2284352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 684480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1190976, - "linear_dense_total": 4718592, - "linear_nnz": 1875456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 672320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 815872, - "linear_dense_total": 4718592, - "linear_nnz": 1488192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 570176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 399104, - "linear_dense_total": 4718592, - "linear_nnz": 969280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 345664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 167744, - "linear_dense_total": 4718592, - "linear_nnz": 513408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 19026944, - "linear_sparsity": 77.59813850308642, - "linear_total": 84934656, - "nnz": 42955274, - "total": 108893186, - "total_sparsity": 60.55283569350244 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a8-l40-dl1--2021-01-24--15-47-15/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 76.87795648060549, - "f1": 85.16652519097626 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 19.238733966827393, - "eval_elapsed_time": 26.43846725206822 - }, - "speedup": 2.0060775865978457, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 330432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 520000, - "linear_dense_total": 4718592, - "linear_nnz": 850432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 468224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 724864, - "linear_dense_total": 4718592, - "linear_nnz": 1193088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 206912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 137088, - "linear_dense_total": 4718592, - "linear_nnz": 344000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 127744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 128064, - "linear_dense_total": 4718592, - "linear_nnz": 255808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 511104, - "linear_attention_total": 2359296, - "linear_dense_nnz": 975680, - "linear_dense_total": 4718592, - "linear_nnz": 1486784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 688192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 908032, - "linear_dense_total": 4718592, - "linear_nnz": 1596224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 551360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 863296, - "linear_dense_total": 4718592, - "linear_nnz": 1414656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 466304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 787328, - "linear_dense_total": 4718592, - "linear_nnz": 1253632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 451840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 695488, - "linear_dense_total": 4718592, - "linear_nnz": 1147328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 497920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 475840, - "linear_dense_total": 4718592, - "linear_nnz": 973760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 302528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 217600, - "linear_dense_total": 4718592, - "linear_nnz": 520128, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 255168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 91264, - "linear_dense_total": 4718592, - "linear_nnz": 346432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 11382272, - "linear_sparsity": 86.59878954475309, - "linear_total": 84934656, - "nnz": 35298682, - "total": 108893186, - "total_sparsity": 67.5841222976064 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.93661305581836, - "f1": 88.35425478567389 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 23.427229469299316, - "eval_elapsed_time": 30.796412555966526 - }, - "speedup": 1.6474160145973682, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 880896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1947648, - "linear_dense_total": 4718592, - "linear_nnz": 2828544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 849152, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1970688, - "linear_dense_total": 4718592, - "linear_nnz": 2819840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 583168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 228864, - "linear_dense_total": 4718592, - "linear_nnz": 812032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 397312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 824832, - "linear_dense_total": 4718592, - "linear_nnz": 1222144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1169408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2198016, - "linear_dense_total": 4718592, - "linear_nnz": 3367424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1352448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2168832, - "linear_dense_total": 4718592, - "linear_nnz": 3521280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1524992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2168832, - "linear_dense_total": 4718592, - "linear_nnz": 3693824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1511680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2042880, - "linear_dense_total": 4718592, - "linear_nnz": 3554560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1336320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1598976, - "linear_dense_total": 4718592, - "linear_nnz": 2935296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1178112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1274880, - "linear_dense_total": 4718592, - "linear_nnz": 2452992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1134080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 701952, - "linear_dense_total": 4718592, - "linear_nnz": 1836032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 702976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 321024, - "linear_dense_total": 4718592, - "linear_nnz": 1024000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30067968, - "linear_sparsity": 64.59870515046296, - "linear_total": 84934656, - "nnz": 53990689, - "total": 108893186, - "total_sparsity": 50.41867082482094 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 81.11636707663197, - "f1": 88.26635621180897 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 23.067204750061034, - "eval_elapsed_time": 30.552880198229104 - }, - "speedup": 1.6731282972319816, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 878336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1947648, - "linear_dense_total": 4718592, - "linear_nnz": 2825984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 852736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1970688, - "linear_dense_total": 4718592, - "linear_nnz": 2823424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 583168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 228864, - "linear_dense_total": 4718592, - "linear_nnz": 812032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 385792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 824832, - "linear_dense_total": 4718592, - "linear_nnz": 1210624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1168384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2198016, - "linear_dense_total": 4718592, - "linear_nnz": 3366400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1360384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2168832, - "linear_dense_total": 4718592, - "linear_nnz": 3529216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1525248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2168832, - "linear_dense_total": 4718592, - "linear_nnz": 3694080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1519360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2042880, - "linear_dense_total": 4718592, - "linear_nnz": 3562240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1345792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1598976, - "linear_dense_total": 4718592, - "linear_nnz": 2944768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1175296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1274880, - "linear_dense_total": 4718592, - "linear_nnz": 2450176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1126912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 701952, - "linear_dense_total": 4718592, - "linear_nnz": 1828864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 702464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 321024, - "linear_dense_total": 4718592, - "linear_nnz": 1023488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30071296, - "linear_sparsity": 64.5947868441358, - "linear_total": 84934656, - "nnz": 53994017, - "total": 108893186, - "total_sparsity": 50.41561461889819 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.93661305581836, - "f1": 88.29241912882233 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.73566310119629, - "eval_elapsed_time": 37.101448519621044 - }, - "speedup": 1.2979160032189903, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 466432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2896491, - "linear_dense_total": 4718592, - "linear_nnz": 3362923, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 578560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2933262, - "linear_dense_total": 4718592, - "linear_nnz": 3511822, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 353792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1168742, - "linear_dense_total": 4718592, - "linear_nnz": 1522534, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 204032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 632549, - "linear_dense_total": 4718592, - "linear_nnz": 836581, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 636672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3005770, - "linear_dense_total": 4718592, - "linear_nnz": 3642442, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 857344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2985991, - "linear_dense_total": 4718592, - "linear_nnz": 3843335, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 829184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2939127, - "linear_dense_total": 4718592, - "linear_nnz": 3768311, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 754432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2915969, - "linear_dense_total": 4718592, - "linear_nnz": 3670401, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 767488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2787598, - "linear_dense_total": 4718592, - "linear_nnz": 3555086, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 752640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2497594, - "linear_dense_total": 4718592, - "linear_nnz": 3250234, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 553472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2115777, - "linear_dense_total": 4718592, - "linear_nnz": 2669249, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 413440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1490216, - "linear_dense_total": 4718592, - "linear_nnz": 1903656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 35536574, - "linear_sparsity": 58.160101337197375, - "linear_total": 84934656, - "nnz": 59478503, - "total": 108893186, - "total_sparsity": 45.379040521415185 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.77578051087986, - "f1": 88.22778160568927 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.759838722229006, - "eval_elapsed_time": 37.11843426898122 - }, - "speedup": 1.2968616317313288, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 469248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2896466, - "linear_dense_total": 4718592, - "linear_nnz": 3365714, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 574976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2933134, - "linear_dense_total": 4718592, - "linear_nnz": 3508110, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 355584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1168698, - "linear_dense_total": 4718592, - "linear_nnz": 1524282, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 201472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 632483, - "linear_dense_total": 4718592, - "linear_nnz": 833955, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 634624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3005666, - "linear_dense_total": 4718592, - "linear_nnz": 3640290, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 851456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2985914, - "linear_dense_total": 4718592, - "linear_nnz": 3837370, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 830720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2938982, - "linear_dense_total": 4718592, - "linear_nnz": 3769702, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 756480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2915873, - "linear_dense_total": 4718592, - "linear_nnz": 3672353, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 768256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2787463, - "linear_dense_total": 4718592, - "linear_nnz": 3555719, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 753408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2497485, - "linear_dense_total": 4718592, - "linear_nnz": 3250893, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 550912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2115685, - "linear_dense_total": 4718592, - "linear_nnz": 2666597, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 413184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1490132, - "linear_dense_total": 4718592, - "linear_nnz": 1903316, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 35528301, - "linear_sparsity": 58.16984176635742, - "linear_total": 84934656, - "nnz": 59470230, - "total": 108893186, - "total_sparsity": 45.38663787466004 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.92431409649953, - "f1": 87.57193515884181 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 27.83310959625244, - "eval_elapsed_time": 35.16166925104335 - }, - "speedup": 1.3866360448121684, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 341248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2332385, - "linear_dense_total": 4718592, - "linear_nnz": 2673633, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 462592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2387588, - "linear_dense_total": 4718592, - "linear_nnz": 2850180, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 227328, - "linear_attention_total": 2359296, - "linear_dense_nnz": 646176, - "linear_dense_total": 4718592, - "linear_nnz": 873504, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 128000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 326046, - "linear_dense_total": 4718592, - "linear_nnz": 454046, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 412672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2458384, - "linear_dense_total": 4718592, - "linear_nnz": 2871056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 692736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2421331, - "linear_dense_total": 4718592, - "linear_nnz": 3114067, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 505088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2348506, - "linear_dense_total": 4718592, - "linear_nnz": 2853594, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 548864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2322654, - "linear_dense_total": 4718592, - "linear_nnz": 2871518, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 469504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2138640, - "linear_dense_total": 4718592, - "linear_nnz": 2608144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 552448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1830183, - "linear_dense_total": 4718592, - "linear_nnz": 2382631, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 316672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1440503, - "linear_dense_total": 4718592, - "linear_nnz": 1757175, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 292096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 859209, - "linear_dense_total": 4718592, - "linear_nnz": 1151305, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 26460853, - "linear_sparsity": 68.84563469592435, - "linear_total": 84934656, - "nnz": 50398933, - "total": 108893186, - "total_sparsity": 53.71709208691902 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.02838221381268, - "f1": 87.5280353923367 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 27.96729845428467, - "eval_elapsed_time": 35.3477450478822 - }, - "speedup": 1.3799828778048573, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 335872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2332233, - "linear_dense_total": 4718592, - "linear_nnz": 2668105, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 451584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2387496, - "linear_dense_total": 4718592, - "linear_nnz": 2839080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 224768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 646159, - "linear_dense_total": 4718592, - "linear_nnz": 870927, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 124672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 325999, - "linear_dense_total": 4718592, - "linear_nnz": 450671, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 408576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2458332, - "linear_dense_total": 4718592, - "linear_nnz": 2866908, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 682496, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2421186, - "linear_dense_total": 4718592, - "linear_nnz": 3103682, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 504832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2348406, - "linear_dense_total": 4718592, - "linear_nnz": 2853238, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 558336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2322448, - "linear_dense_total": 4718592, - "linear_nnz": 2880784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 475904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2138474, - "linear_dense_total": 4718592, - "linear_nnz": 2614378, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 542720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1830088, - "linear_dense_total": 4718592, - "linear_nnz": 2372808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 312576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1440402, - "linear_dense_total": 4718592, - "linear_nnz": 1752978, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 288000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 859129, - "linear_dense_total": 4718592, - "linear_nnz": 1147129, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 26420688, - "linear_sparsity": 68.89292399088542, - "linear_total": 84934656, - "nnz": 50358753, - "total": 108893186, - "total_sparsity": 53.75399063078199 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l40--2021-01-19--16-58-18/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.86471144749291, - "f1": 86.87223379259328 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 26.916674156188964, - "eval_elapsed_time": 34.25446852017194 - }, - "speedup": 1.4338470191904102, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 211712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1718621, - "linear_dense_total": 4718592, - "linear_nnz": 1930333, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 345600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1771278, - "linear_dense_total": 4718592, - "linear_nnz": 2116878, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 157696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 325955, - "linear_dense_total": 4718592, - "linear_nnz": 483651, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 90368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 164774, - "linear_dense_total": 4718592, - "linear_nnz": 255142, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 278016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1816807, - "linear_dense_total": 4718592, - "linear_nnz": 2094823, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 493312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1772769, - "linear_dense_total": 4718592, - "linear_nnz": 2266081, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 304128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1682765, - "linear_dense_total": 4718592, - "linear_nnz": 1986893, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 357376, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1635131, - "linear_dense_total": 4718592, - "linear_nnz": 1992507, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 278528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1457711, - "linear_dense_total": 4718592, - "linear_nnz": 1736239, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 355072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1174807, - "linear_dense_total": 4718592, - "linear_nnz": 1529879, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 183552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 867865, - "linear_dense_total": 4718592, - "linear_nnz": 1051417, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 196864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 439457, - "linear_dense_total": 4718592, - "linear_nnz": 636321, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 18080164, - "linear_sparsity": 78.7128542676384, - "linear_total": 84934656, - "nnz": 42014844, - "total": 108893186, - "total_sparsity": 61.41646181607727 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l10--2021-01-19--17-00-07/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 81.3434247871334, - "f1": 88.502960365548 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 34.458772911071776, - "eval_elapsed_time": 41.833797600120306 - }, - "speedup": 1.120016464456589, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 356016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2711219, - "linear_dense_total": 4718592, - "linear_nnz": 3067235, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 506400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2753947, - "linear_dense_total": 4718592, - "linear_nnz": 3260347, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 305952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 956610, - "linear_dense_total": 4718592, - "linear_nnz": 1262562, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 172864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 532866, - "linear_dense_total": 4718592, - "linear_nnz": 705730, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 658880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2827796, - "linear_dense_total": 4718592, - "linear_nnz": 3486676, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 782176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2810214, - "linear_dense_total": 4718592, - "linear_nnz": 3592390, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 874272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2770460, - "linear_dense_total": 4718592, - "linear_nnz": 3644732, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 772928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2750302, - "linear_dense_total": 4718592, - "linear_nnz": 3523230, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 767984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2610331, - "linear_dense_total": 4718592, - "linear_nnz": 3378315, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 687968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2295378, - "linear_dense_total": 4718592, - "linear_nnz": 2983346, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 596368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1868727, - "linear_dense_total": 4718592, - "linear_nnz": 2465095, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 404448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1245775, - "linear_dense_total": 4718592, - "linear_nnz": 1650223, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 33019881, - "linear_sparsity": 61.12319451791268, - "linear_total": 84934656, - "nnz": 56967217, - "total": 108893186, - "total_sparsity": 47.6852325727709 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l20--2021-01-19--17-00-34/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.54872280037843, - "f1": 88.09731480353894 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 31.47156787109375, - "eval_elapsed_time": 38.88521202793345 - }, - "speedup": 1.2263257160702048, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 233808, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2108257, - "linear_dense_total": 4718592, - "linear_nnz": 2342065, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 370912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2165809, - "linear_dense_total": 4718592, - "linear_nnz": 2536721, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 189856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 496337, - "linear_dense_total": 4718592, - "linear_nnz": 686193, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 106192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 273404, - "linear_dense_total": 4718592, - "linear_nnz": 379596, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 368864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2238488, - "linear_dense_total": 4718592, - "linear_nnz": 2607352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 528528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2212294, - "linear_dense_total": 4718592, - "linear_nnz": 2740822, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 515168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2147598, - "linear_dense_total": 4718592, - "linear_nnz": 2662766, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 456576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2125672, - "linear_dense_total": 4718592, - "linear_nnz": 2582248, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 426512, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1957790, - "linear_dense_total": 4718592, - "linear_nnz": 2384302, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 424416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1621523, - "linear_dense_total": 4718592, - "linear_nnz": 2045939, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 311248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1220304, - "linear_dense_total": 4718592, - "linear_nnz": 1531552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 249120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 687520, - "linear_dense_total": 4718592, - "linear_nnz": 936640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 23436196, - "linear_sparsity": 72.40679234634212, - "linear_total": 84934656, - "nnz": 47377613, - "total": 108893186, - "total_sparsity": 56.49166422589565 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.39451277199622, - "f1": 87.22039562207584 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.016168815612794, - "eval_elapsed_time": 36.33264479693025 - }, - "speedup": 1.3300995472773969, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 145232, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1501972, - "linear_dense_total": 4718592, - "linear_nnz": 1647204, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 280192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1562394, - "linear_dense_total": 4718592, - "linear_nnz": 1842586, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 126288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 233713, - "linear_dense_total": 4718592, - "linear_nnz": 360001, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 73824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 141408, - "linear_dense_total": 4718592, - "linear_nnz": 215232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 234064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1607786, - "linear_dense_total": 4718592, - "linear_nnz": 1841850, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 386752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1573980, - "linear_dense_total": 4718592, - "linear_nnz": 1960732, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 281632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1495134, - "linear_dense_total": 4718592, - "linear_nnz": 1776766, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 288320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1455910, - "linear_dense_total": 4718592, - "linear_nnz": 1744230, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 240864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1278042, - "linear_dense_total": 4718592, - "linear_nnz": 1518906, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 275424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1000907, - "linear_dense_total": 4718592, - "linear_nnz": 1276331, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 170816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 708174, - "linear_dense_total": 4718592, - "linear_nnz": 878990, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 165920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 330996, - "linear_dense_total": 4718592, - "linear_nnz": 496916, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15559744, - "linear_sparsity": 81.68033552758487, - "linear_total": 84934656, - "nnz": 39496838, - "total": 108893186, - "total_sparsity": 63.728825052469304 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.12961210974456, - "f1": 87.04337592394437 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.089330375671388, - "eval_elapsed_time": 36.40407280996442 - }, - "speedup": 1.3267542603060118, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 142224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1492013, - "linear_dense_total": 4718592, - "linear_nnz": 1634237, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 275696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1552665, - "linear_dense_total": 4718592, - "linear_nnz": 1828361, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 124096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 231707, - "linear_dense_total": 4718592, - "linear_nnz": 355803, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 72608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 140436, - "linear_dense_total": 4718592, - "linear_nnz": 213044, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 227984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1597983, - "linear_dense_total": 4718592, - "linear_nnz": 1825967, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 379616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1563950, - "linear_dense_total": 4718592, - "linear_nnz": 1943566, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 275824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1485631, - "linear_dense_total": 4718592, - "linear_nnz": 1761455, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 282736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1446554, - "linear_dense_total": 4718592, - "linear_nnz": 1729290, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 235856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1269099, - "linear_dense_total": 4718592, - "linear_nnz": 1504955, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 269520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 993705, - "linear_dense_total": 4718592, - "linear_nnz": 1263225, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 167616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 702880, - "linear_dense_total": 4718592, - "linear_nnz": 870496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 161552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 328143, - "linear_dense_total": 4718592, - "linear_nnz": 489695, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15420094, - "linear_sparsity": 81.84475604398752, - "linear_total": 84934656, - "nnz": 39357122, - "total": 108893186, - "total_sparsity": 63.85713060135829 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.09176915799432, - "f1": 86.93076968810146 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.182387649536132, - "eval_elapsed_time": 36.50873678829521 - }, - "speedup": 1.3225234846739682, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 142224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1491817, - "linear_dense_total": 4718592, - "linear_nnz": 1634041, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 275888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1552458, - "linear_dense_total": 4718592, - "linear_nnz": 1828346, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 123920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 231690, - "linear_dense_total": 4718592, - "linear_nnz": 355610, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 72512, - "linear_attention_total": 2359296, - "linear_dense_nnz": 140404, - "linear_dense_total": 4718592, - "linear_nnz": 212916, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 227744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1597816, - "linear_dense_total": 4718592, - "linear_nnz": 1825560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 379008, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1563794, - "linear_dense_total": 4718592, - "linear_nnz": 1942802, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 276192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1485468, - "linear_dense_total": 4718592, - "linear_nnz": 1761660, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 282096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1446397, - "linear_dense_total": 4718592, - "linear_nnz": 1728493, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 235856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1268987, - "linear_dense_total": 4718592, - "linear_nnz": 1504843, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 269456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 993538, - "linear_dense_total": 4718592, - "linear_nnz": 1262994, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 167520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 702743, - "linear_dense_total": 4718592, - "linear_nnz": 870263, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 161424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 328079, - "linear_dense_total": 4718592, - "linear_nnz": 489503, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15417031, - "linear_sparsity": 81.84836234575437, - "linear_total": 84934656, - "nnz": 39354055, - "total": 108893186, - "total_sparsity": 63.859947122862216 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 81.00283822138127, - "f1": 88.2671108560581 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 32.23066467285156, - "eval_elapsed_time": 39.6229472043924 - }, - "speedup": 1.1974432856757005, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 405824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2826999, - "linear_dense_total": 4718592, - "linear_nnz": 3232823, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 543872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2868775, - "linear_dense_total": 4718592, - "linear_nnz": 3412647, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 325760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1081633, - "linear_dense_total": 4718592, - "linear_nnz": 1407393, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 174016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 591690, - "linear_dense_total": 4718592, - "linear_nnz": 765706, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 613248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2943603, - "linear_dense_total": 4718592, - "linear_nnz": 3556851, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 791424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2916774, - "linear_dense_total": 4718592, - "linear_nnz": 3708198, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 819072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2876887, - "linear_dense_total": 4718592, - "linear_nnz": 3695959, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 788928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2855670, - "linear_dense_total": 4718592, - "linear_nnz": 3644598, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 761600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2724886, - "linear_dense_total": 4718592, - "linear_nnz": 3486486, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 686464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2427996, - "linear_dense_total": 4718592, - "linear_nnz": 3114460, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 602496, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2013542, - "linear_dense_total": 4718592, - "linear_nnz": 2616038, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 381632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1394109, - "linear_dense_total": 4718592, - "linear_nnz": 1775741, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 34416900, - "linear_sparsity": 59.47837829589844, - "linear_total": 84934656, - "nnz": 58360680, - "total": 108893186, - "total_sparsity": 46.405572153982156 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 81.01229895931883, - "f1": 88.16022239737082 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 32.31462289428711, - "eval_elapsed_time": 39.686994375661016 - }, - "speedup": 1.1943321489972945, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 404736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2826896, - "linear_dense_total": 4718592, - "linear_nnz": 3231632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 543040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2868676, - "linear_dense_total": 4718592, - "linear_nnz": 3411716, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 322624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1081551, - "linear_dense_total": 4718592, - "linear_nnz": 1404175, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 172288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 591605, - "linear_dense_total": 4718592, - "linear_nnz": 763893, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 614464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2943501, - "linear_dense_total": 4718592, - "linear_nnz": 3557965, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 790144, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2916630, - "linear_dense_total": 4718592, - "linear_nnz": 3706774, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 816832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2876748, - "linear_dense_total": 4718592, - "linear_nnz": 3693580, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 785920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2855585, - "linear_dense_total": 4718592, - "linear_nnz": 3641505, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 759424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2724738, - "linear_dense_total": 4718592, - "linear_nnz": 3484162, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 687040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2427854, - "linear_dense_total": 4718592, - "linear_nnz": 3114894, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 603648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2013418, - "linear_dense_total": 4718592, - "linear_nnz": 2617066, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 379328, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1394031, - "linear_dense_total": 4718592, - "linear_nnz": 1773359, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 34400721, - "linear_sparsity": 59.49742705733687, - "linear_total": 84934656, - "nnz": 58344499, - "total": 108893186, - "total_sparsity": 46.42043166961797 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l20--2021-01-19--16-59-13/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.15137180700094, - "f1": 87.62280270760408 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 28.9650231628418, - "eval_elapsed_time": 36.364678455051035 - }, - "speedup": 1.3324482010041157, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 278464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2254373, - "linear_dense_total": 4718592, - "linear_nnz": 2532837, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 411200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2313203, - "linear_dense_total": 4718592, - "linear_nnz": 2724403, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 207872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 587562, - "linear_dense_total": 4718592, - "linear_nnz": 795434, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 115648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 304918, - "linear_dense_total": 4718592, - "linear_nnz": 420566, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 388544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2383637, - "linear_dense_total": 4718592, - "linear_nnz": 2772181, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 616064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2346825, - "linear_dense_total": 4718592, - "linear_nnz": 2962889, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 475392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2281407, - "linear_dense_total": 4718592, - "linear_nnz": 2756799, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 485760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2255524, - "linear_dense_total": 4718592, - "linear_nnz": 2741284, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 436416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2089830, - "linear_dense_total": 4718592, - "linear_nnz": 2526246, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 473664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1759353, - "linear_dense_total": 4718592, - "linear_nnz": 2233017, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 292096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1360596, - "linear_dense_total": 4718592, - "linear_nnz": 1652692, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 260864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 795671, - "linear_dense_total": 4718592, - "linear_nnz": 1056535, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 25174883, - "linear_sparsity": 70.35970452391072, - "linear_total": 84934656, - "nnz": 49113499, - "total": 108893186, - "total_sparsity": 54.89754611459343 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.59981078524125, - "f1": 86.70965342219107 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 27.427432876586913, - "eval_elapsed_time": 34.77788851317018 - }, - "speedup": 1.407145655192423, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 185152, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1640271, - "linear_dense_total": 4718592, - "linear_nnz": 1825423, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 309376, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1701856, - "linear_dense_total": 4718592, - "linear_nnz": 2011232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 140224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 283313, - "linear_dense_total": 4718592, - "linear_nnz": 423537, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 82304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 154892, - "linear_dense_total": 4718592, - "linear_nnz": 237196, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 266368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1747153, - "linear_dense_total": 4718592, - "linear_nnz": 2013521, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 452288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1699193, - "linear_dense_total": 4718592, - "linear_nnz": 2151481, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 315584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1622345, - "linear_dense_total": 4718592, - "linear_nnz": 1937929, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 324160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1582184, - "linear_dense_total": 4718592, - "linear_nnz": 1906344, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 264448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1396319, - "linear_dense_total": 4718592, - "linear_nnz": 1660767, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 312704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1106218, - "linear_dense_total": 4718592, - "linear_nnz": 1418922, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 176128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 797060, - "linear_dense_total": 4718592, - "linear_nnz": 973188, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 178368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 396240, - "linear_dense_total": 4718592, - "linear_nnz": 574608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 17134148, - "linear_sparsity": 79.82667051715615, - "linear_total": 84934656, - "nnz": 41069735, - "total": 108893186, - "total_sparsity": 62.28438480989986 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.78902554399244, - "f1": 86.80367154149816 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 27.48367044067383, - "eval_elapsed_time": 34.82450146274641 - }, - "speedup": 1.404266329298368, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 181120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1630256, - "linear_dense_total": 4718592, - "linear_nnz": 1811376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 307392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1692246, - "linear_dense_total": 4718592, - "linear_nnz": 1999638, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 136448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 281138, - "linear_dense_total": 4718592, - "linear_nnz": 417586, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 82304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 153935, - "linear_dense_total": 4718592, - "linear_nnz": 236239, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 266880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1737446, - "linear_dense_total": 4718592, - "linear_nnz": 2004326, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 442304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1689801, - "linear_dense_total": 4718592, - "linear_nnz": 2132105, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 309632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1613097, - "linear_dense_total": 4718592, - "linear_nnz": 1922729, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 313664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1572856, - "linear_dense_total": 4718592, - "linear_nnz": 1886520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 259072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1387577, - "linear_dense_total": 4718592, - "linear_nnz": 1646649, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 306112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1098827, - "linear_dense_total": 4718592, - "linear_nnz": 1404939, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 173184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 791446, - "linear_dense_total": 4718592, - "linear_nnz": 964630, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 172928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 393190, - "linear_dense_total": 4718592, - "linear_nnz": 566118, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 16992855, - "linear_sparsity": 79.99302546183267, - "linear_total": 84934656, - "nnz": 40928357, - "total": 108893186, - "total_sparsity": 62.414216625088 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.6092715231788, - "f1": 86.70267601348202 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 27.478721130371095, - "eval_elapsed_time": 34.80613293591887 - }, - "speedup": 1.4045192577290035, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 180736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1630123, - "linear_dense_total": 4718592, - "linear_nnz": 1810859, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 305920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1692103, - "linear_dense_total": 4718592, - "linear_nnz": 1998023, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 135616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 281100, - "linear_dense_total": 4718592, - "linear_nnz": 416716, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 81536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 153912, - "linear_dense_total": 4718592, - "linear_nnz": 235448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 263936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1737263, - "linear_dense_total": 4718592, - "linear_nnz": 2001199, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 442496, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1689622, - "linear_dense_total": 4718592, - "linear_nnz": 2132118, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 306304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1612927, - "linear_dense_total": 4718592, - "linear_nnz": 1919231, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 312128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1572769, - "linear_dense_total": 4718592, - "linear_nnz": 1884897, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 258304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1387454, - "linear_dense_total": 4718592, - "linear_nnz": 1645758, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 305856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1098709, - "linear_dense_total": 4718592, - "linear_nnz": 1404565, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 172480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 791310, - "linear_dense_total": 4718592, - "linear_nnz": 963790, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 170944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 393127, - "linear_dense_total": 4718592, - "linear_nnz": 564071, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 16976675, - "linear_sparsity": 80.01207540064682, - "linear_total": 84934656, - "nnz": 40912185, - "total": 108893186, - "total_sparsity": 62.42906787574385 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l10--2021-01-20--18-58-11/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.93661305581836, - "f1": 88.34112193061533 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 16, - "dense_block_rows": 16, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 30.13610975646973, - "eval_elapsed_time": 37.54532916797325 - }, - "speedup": 1.2806693802635063, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 517888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4068608, - "linear_dense_total": 4718592, - "linear_nnz": 4586496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 641536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4202752, - "linear_dense_total": 4718592, - "linear_nnz": 4844288, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 415488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1090304, - "linear_dense_total": 4718592, - "linear_nnz": 1505792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 254720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 947200, - "linear_dense_total": 4718592, - "linear_nnz": 1201920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 841472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4313856, - "linear_dense_total": 4718592, - "linear_nnz": 5155328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1072896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4336128, - "linear_dense_total": 4718592, - "linear_nnz": 5409024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1068800, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4317184, - "linear_dense_total": 4718592, - "linear_nnz": 5385984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 961792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4311040, - "linear_dense_total": 4718592, - "linear_nnz": 5272832, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 986880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4141568, - "linear_dense_total": 4718592, - "linear_nnz": 5128448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 905472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3820032, - "linear_dense_total": 4718592, - "linear_nnz": 4725504, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 756224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3085568, - "linear_dense_total": 4718592, - "linear_nnz": 3841792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 463360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1416448, - "linear_dense_total": 4718592, - "linear_nnz": 1879808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 48937216, - "linear_sparsity": 42.38251109182099, - "linear_total": 84934656, - "nnz": 72878482, - "total": 108893186, - "total_sparsity": 33.07342297799975 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.02838221381268, - "f1": 87.51569063636161 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 16, - "dense_block_rows": 16, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 26.3544778213501, - "eval_elapsed_time": 33.69302155217156 - }, - "speedup": 1.4644339860190774, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 418816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3448576, - "linear_dense_total": 4718592, - "linear_nnz": 3867392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 553728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3696384, - "linear_dense_total": 4718592, - "linear_nnz": 4250112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 291584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 654592, - "linear_dense_total": 4718592, - "linear_nnz": 946176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 168960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 615424, - "linear_dense_total": 4718592, - "linear_nnz": 784384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 562432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3956992, - "linear_dense_total": 4718592, - "linear_nnz": 4519424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 827392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3982336, - "linear_dense_total": 4718592, - "linear_nnz": 4809728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 790016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3912960, - "linear_dense_total": 4718592, - "linear_nnz": 4702976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 701696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3899648, - "linear_dense_total": 4718592, - "linear_nnz": 4601344, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 667392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3576064, - "linear_dense_total": 4718592, - "linear_nnz": 4243456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 700416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2968832, - "linear_dense_total": 4718592, - "linear_nnz": 3669248, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 437504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1966592, - "linear_dense_total": 4718592, - "linear_nnz": 2404096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 361472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 693504, - "linear_dense_total": 4718592, - "linear_nnz": 1054976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 39853312, - "linear_sparsity": 53.077678915895056, - "linear_total": 84934656, - "nnz": 63788226, - "total": 108893186, - "total_sparsity": 41.42128782970864 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.55534531693472, - "f1": 87.439750439335 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 16, - "dense_block_rows": 16, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 26.32847610473633, - "eval_elapsed_time": 33.60846929671243 - }, - "speedup": 1.4658802450943298, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 416256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3444992, - "linear_dense_total": 4718592, - "linear_nnz": 3861248, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 541952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3708416, - "linear_dense_total": 4718592, - "linear_nnz": 4250368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 285184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 647936, - "linear_dense_total": 4718592, - "linear_nnz": 933120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 174080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 611328, - "linear_dense_total": 4718592, - "linear_nnz": 785408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 555520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3977216, - "linear_dense_total": 4718592, - "linear_nnz": 4532736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 802816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4019968, - "linear_dense_total": 4718592, - "linear_nnz": 4822784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 774400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3939840, - "linear_dense_total": 4718592, - "linear_nnz": 4714240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 686592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3897600, - "linear_dense_total": 4718592, - "linear_nnz": 4584192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 656384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3568640, - "linear_dense_total": 4718592, - "linear_nnz": 4225024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 676864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2957312, - "linear_dense_total": 4718592, - "linear_nnz": 3634176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 432640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1931264, - "linear_dense_total": 4718592, - "linear_nnz": 2363904, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 345344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 685056, - "linear_dense_total": 4718592, - "linear_nnz": 1030400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 39737600, - "linear_sparsity": 53.213915412808646, - "linear_total": 84934656, - "nnz": 63672482, - "total": 108893186, - "total_sparsity": 41.52757914531035 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l40--2021-01-20--18-59-08/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.11731315042573, - "f1": 86.14927876930865 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 16, - "dense_block_rows": 16, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 23.35162329864502, - "eval_elapsed_time": 30.60480569722131 - }, - "speedup": 1.6527498971607057, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 331008, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2354688, - "linear_dense_total": 4718592, - "linear_nnz": 2685696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 432384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2826240, - "linear_dense_total": 4718592, - "linear_nnz": 3258624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 203008, - "linear_attention_total": 2359296, - "linear_dense_nnz": 415744, - "linear_dense_total": 4718592, - "linear_nnz": 618752, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 112128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 423168, - "linear_dense_total": 4718592, - "linear_nnz": 535296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 423936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3302144, - "linear_dense_total": 4718592, - "linear_nnz": 3726080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 669440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3248128, - "linear_dense_total": 4718592, - "linear_nnz": 3917568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 453632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3193600, - "linear_dense_total": 4718592, - "linear_nnz": 3647232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 473856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3119616, - "linear_dense_total": 4718592, - "linear_nnz": 3593472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 445952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2493696, - "linear_dense_total": 4718592, - "linear_nnz": 2939648, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 490752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1891072, - "linear_dense_total": 4718592, - "linear_nnz": 2381824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 275712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1108736, - "linear_dense_total": 4718592, - "linear_nnz": 1384448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 258304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 348928, - "linear_dense_total": 4718592, - "linear_nnz": 607232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 29295872, - "linear_sparsity": 65.5077522183642, - "linear_total": 84934656, - "nnz": 53223538, - "total": 108893186, - "total_sparsity": 51.12316945157615 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l10--2021-01-20--19-01-04/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 81.47587511825922, - "f1": 88.58172107792693 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 34.2993692779541, - "eval_elapsed_time": 41.87211530236527 - }, - "speedup": 1.1252216532791355, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 428592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3980096, - "linear_dense_total": 4718592, - "linear_nnz": 4408688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 545744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4015584, - "linear_dense_total": 4718592, - "linear_nnz": 4561328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 329968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2092032, - "linear_dense_total": 4718592, - "linear_nnz": 2422000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 190816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1335104, - "linear_dense_total": 4718592, - "linear_nnz": 1525920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 729664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4061440, - "linear_dense_total": 4718592, - "linear_nnz": 4791104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 851472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4062640, - "linear_dense_total": 4718592, - "linear_nnz": 4914112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 960992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4047744, - "linear_dense_total": 4718592, - "linear_nnz": 5008736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 902768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4006096, - "linear_dense_total": 4718592, - "linear_nnz": 4908864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 861120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3920672, - "linear_dense_total": 4718592, - "linear_nnz": 4781792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 759664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3732848, - "linear_dense_total": 4718592, - "linear_nnz": 4492512, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 670096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3391392, - "linear_dense_total": 4718592, - "linear_nnz": 4061488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 444064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2661776, - "linear_dense_total": 4718592, - "linear_nnz": 3105840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 48982384, - "linear_sparsity": 42.329331386236504, - "linear_total": 84934656, - "nnz": 72930262, - "total": 108893186, - "total_sparsity": 33.025871793300276 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l20--2021-01-20--19-01-34/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.52980132450331, - "f1": 88.02284574429551 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 32.3459995803833, - "eval_elapsed_time": 40.03914254019037 - }, - "speedup": 1.1931736074335828, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 261808, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3592944, - "linear_dense_total": 4718592, - "linear_nnz": 3854752, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 407856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3665376, - "linear_dense_total": 4718592, - "linear_nnz": 4073232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 212544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1095184, - "linear_dense_total": 4718592, - "linear_nnz": 1307728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 122704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 702496, - "linear_dense_total": 4718592, - "linear_nnz": 825200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 470352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3747664, - "linear_dense_total": 4718592, - "linear_nnz": 4218016, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 586320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3731872, - "linear_dense_total": 4718592, - "linear_nnz": 4318192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 598112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3696160, - "linear_dense_total": 4718592, - "linear_nnz": 4294272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 540976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3650592, - "linear_dense_total": 4718592, - "linear_nnz": 4191568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 518320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3500640, - "linear_dense_total": 4718592, - "linear_nnz": 4018960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 494608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3197872, - "linear_dense_total": 4718592, - "linear_nnz": 3692480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 381872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2682864, - "linear_dense_total": 4718592, - "linear_nnz": 3064736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 281888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1600800, - "linear_dense_total": 4718592, - "linear_nnz": 1882688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 39741824, - "linear_sparsity": 53.208942177854944, - "linear_total": 84934656, - "nnz": 63685078, - "total": 108893186, - "total_sparsity": 41.51601184669167 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l20--2021-01-20--19-01-34/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.09460737937559, - "f1": 87.80889686617203 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 32.19205239105224, - "eval_elapsed_time": 39.82947535999119 - }, - "speedup": 1.1988795413397866, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 258016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3584960, - "linear_dense_total": 4718592, - "linear_nnz": 3842976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 404784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3659360, - "linear_dense_total": 4718592, - "linear_nnz": 4064144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 209136, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1083920, - "linear_dense_total": 4718592, - "linear_nnz": 1293056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 120976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 697408, - "linear_dense_total": 4718592, - "linear_nnz": 818384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 460752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3741328, - "linear_dense_total": 4718592, - "linear_nnz": 4202080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 577184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3724032, - "linear_dense_total": 4718592, - "linear_nnz": 4301216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 587792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3689648, - "linear_dense_total": 4718592, - "linear_nnz": 4277440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 530480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3641984, - "linear_dense_total": 4718592, - "linear_nnz": 4172464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 508336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3491408, - "linear_dense_total": 4718592, - "linear_nnz": 3999744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 486304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3187056, - "linear_dense_total": 4718592, - "linear_nnz": 3673360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 374032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2669344, - "linear_dense_total": 4718592, - "linear_nnz": 3043376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 276992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1586976, - "linear_dense_total": 4718592, - "linear_nnz": 1863968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 39552208, - "linear_sparsity": 53.432191448447156, - "linear_total": 84934656, - "nnz": 63495382, - "total": 108893186, - "total_sparsity": 41.69021558428826 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l40--2021-01-20--19-02-03/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.64049195837275, - "f1": 87.31499809166372 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 28.556625274658202, - "eval_elapsed_time": 36.13367621740326 - }, - "speedup": 1.3515039902008532, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 172416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2978704, - "linear_dense_total": 4718592, - "linear_nnz": 3151120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 308192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3103168, - "linear_dense_total": 4718592, - "linear_nnz": 3411360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 140384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 526736, - "linear_dense_total": 4718592, - "linear_nnz": 667120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 84608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 377248, - "linear_dense_total": 4718592, - "linear_nnz": 461856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 285568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3205568, - "linear_dense_total": 4718592, - "linear_nnz": 3491136, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 437904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3165264, - "linear_dense_total": 4718592, - "linear_nnz": 3603168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 321040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3089840, - "linear_dense_total": 4718592, - "linear_nnz": 3410880, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 332784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3023632, - "linear_dense_total": 4718592, - "linear_nnz": 3356416, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 288464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2784432, - "linear_dense_total": 4718592, - "linear_nnz": 3072896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 328464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2354768, - "linear_dense_total": 4718592, - "linear_nnz": 2683232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 204832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1752368, - "linear_dense_total": 4718592, - "linear_nnz": 1957200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 189616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 715936, - "linear_dense_total": 4718592, - "linear_nnz": 905552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30171936, - "linear_sparsity": 64.47629575376158, - "linear_total": 84934656, - "nnz": 54109530, - "total": 108893186, - "total_sparsity": 50.30953543778212 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l10--2021-01-20--18-59-37/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 81.10690633869442, - "f1": 88.3744311515211 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 32.22343955230713, - "eval_elapsed_time": 39.62965265568346 - }, - "speedup": 1.1977117757004876, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 446336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4004672, - "linear_dense_total": 4718592, - "linear_nnz": 4451008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 597248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4077632, - "linear_dense_total": 4718592, - "linear_nnz": 4674880, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 362688, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1517184, - "linear_dense_total": 4718592, - "linear_nnz": 1879872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 218432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1064384, - "linear_dense_total": 4718592, - "linear_nnz": 1282816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 799296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4155072, - "linear_dense_total": 4718592, - "linear_nnz": 4954368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 950208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4165440, - "linear_dense_total": 4718592, - "linear_nnz": 5115648, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1022400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4152448, - "linear_dense_total": 4718592, - "linear_nnz": 5174848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 914368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4106624, - "linear_dense_total": 4718592, - "linear_nnz": 5020992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 918208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3961088, - "linear_dense_total": 4718592, - "linear_nnz": 4879296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 832704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3684992, - "linear_dense_total": 4718592, - "linear_nnz": 4517696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 715648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3207040, - "linear_dense_total": 4718592, - "linear_nnz": 3922688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 467072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2117440, - "linear_dense_total": 4718592, - "linear_nnz": 2584512, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 48458624, - "linear_sparsity": 42.94599368248457, - "linear_total": 84934656, - "nnz": 72403618, - "total": 108893186, - "total_sparsity": 33.50950536060172 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l10--2021-01-20--18-59-37/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.82308420056765, - "f1": 88.21300800880684 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 32.25489320373535, - "eval_elapsed_time": 39.64649308426306 - }, - "speedup": 1.1965438162077555, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 446080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4004864, - "linear_dense_total": 4718592, - "linear_nnz": 4450944, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 597312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4076928, - "linear_dense_total": 4718592, - "linear_nnz": 4674240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 362048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1517376, - "linear_dense_total": 4718592, - "linear_nnz": 1879424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 217216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1063808, - "linear_dense_total": 4718592, - "linear_nnz": 1281024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 800192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4155456, - "linear_dense_total": 4718592, - "linear_nnz": 4955648, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 948864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4165760, - "linear_dense_total": 4718592, - "linear_nnz": 5114624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1019200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4152640, - "linear_dense_total": 4718592, - "linear_nnz": 5171840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 915392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4108416, - "linear_dense_total": 4718592, - "linear_nnz": 5023808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 916160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3960384, - "linear_dense_total": 4718592, - "linear_nnz": 4876544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 834176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3685056, - "linear_dense_total": 4718592, - "linear_nnz": 4519232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 713856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3207936, - "linear_dense_total": 4718592, - "linear_nnz": 3921792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 465600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2115456, - "linear_dense_total": 4718592, - "linear_nnz": 2581056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 48450176, - "linear_sparsity": 42.95594015239198, - "linear_total": 84934656, - "nnz": 72395170, - "total": 108893186, - "total_sparsity": 33.51726342179023 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l20--2021-01-20--19-00-06/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.05676442762535, - "f1": 87.66615713942541 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 28.86345721435547, - "eval_elapsed_time": 36.22357800696045 - }, - "speedup": 1.3371368758339826, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 326336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3501120, - "linear_dense_total": 4718592, - "linear_nnz": 3827456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 487552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3653568, - "linear_dense_total": 4718592, - "linear_nnz": 4141120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 238208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 756608, - "linear_dense_total": 4718592, - "linear_nnz": 994816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 141568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 622848, - "linear_dense_total": 4718592, - "linear_nnz": 764416, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 487616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3801472, - "linear_dense_total": 4718592, - "linear_nnz": 4289088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 712832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3800064, - "linear_dense_total": 4718592, - "linear_nnz": 4512896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 646272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3743872, - "linear_dense_total": 4718592, - "linear_nnz": 4390144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 625600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3691328, - "linear_dense_total": 4718592, - "linear_nnz": 4316928, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 575808, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3461056, - "linear_dense_total": 4718592, - "linear_nnz": 4036864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 579392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3012928, - "linear_dense_total": 4718592, - "linear_nnz": 3592320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 405632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2347776, - "linear_dense_total": 4718592, - "linear_nnz": 2753408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 317440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1001344, - "linear_dense_total": 4718592, - "linear_nnz": 1318784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 38938240, - "linear_sparsity": 54.1550624517747, - "linear_total": 84934656, - "nnz": 62877338, - "total": 108893186, - "total_sparsity": 42.257784614732465 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l40--2021-01-20--19-00-35/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.76064333017976, - "f1": 86.75922108224064 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 25.933858947753908, - "eval_elapsed_time": 33.4375456799753 - }, - "speedup": 1.4881855061802785, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 241280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2752704, - "linear_dense_total": 4718592, - "linear_nnz": 2993984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 379584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2951104, - "linear_dense_total": 4718592, - "linear_nnz": 3330688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 172352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 419008, - "linear_dense_total": 4718592, - "linear_nnz": 591360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 104768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 388288, - "linear_dense_total": 4718592, - "linear_nnz": 493056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 322880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3194240, - "linear_dense_total": 4718592, - "linear_nnz": 3517120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 565440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3155136, - "linear_dense_total": 4718592, - "linear_nnz": 3720576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 390400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3064768, - "linear_dense_total": 4718592, - "linear_nnz": 3455168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 406592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2993600, - "linear_dense_total": 4718592, - "linear_nnz": 3400192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 356480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2631680, - "linear_dense_total": 4718592, - "linear_nnz": 2988160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 409920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2067776, - "linear_dense_total": 4718592, - "linear_nnz": 2477696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 242048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1370368, - "linear_dense_total": 4718592, - "linear_nnz": 1612416, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 224896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 472768, - "linear_dense_total": 4718592, - "linear_nnz": 697664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 29278080, - "linear_sparsity": 65.52870008680556, - "linear_total": 84934656, - "nnz": 53211146, - "total": 108893186, - "total_sparsity": 51.13454941064908 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13/checkpoint-22132": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6, - 7, - 9, - 11 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 9 - ], - "10": [ - 1, - 4, - 5, - 6, - 7, - 8 - ], - "11": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 11 - ], - "2": [ - 8, - 4, - 7 - ], - "3": [ - 2, - 4, - 6, - 7 - ], - "4": [ - 1, - 2 - ], - "5": [ - 1, - 2, - 5, - 6, - 7 - ], - "6": [ - 0, - 2, - 3, - 7 - ], - "7": [ - 1, - 3, - 6, - 7, - 11 - ], - "8": [ - 0, - 8, - 3, - 4 - ], - "9": [ - 1, - 3, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.69063386944181, - "f1": 88.06386432532665 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13/checkpoint-110660", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 17.390718185424806, - "eval_elapsed_time": 24.534384376835078 - }, - "speedup": 2.2192523962418718, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 634368, - "linear_dense_total": 4718592, - "linear_nnz": 1420800, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 916992, - "linear_dense_total": 4718592, - "linear_nnz": 1703424, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 112128, - "linear_dense_total": 4718592, - "linear_nnz": 1291776, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 313344, - "linear_dense_total": 4718592, - "linear_nnz": 903168, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 1016832, - "linear_dense_total": 4718592, - "linear_nnz": 2786304, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 1076736, - "linear_dense_total": 4718592, - "linear_nnz": 2649600, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 1158144, - "linear_dense_total": 4718592, - "linear_nnz": 3124224, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1073664, - "linear_dense_total": 4718592, - "linear_nnz": 2449920, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 815616, - "linear_dense_total": 4718592, - "linear_nnz": 2388480, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 629760, - "linear_dense_total": 4718592, - "linear_nnz": 2006016, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 337920, - "linear_dense_total": 4718592, - "linear_nnz": 1910784, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 139776, - "linear_dense_total": 4718592, - "linear_nnz": 1122816, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 23757312, - "linear_sparsity": 67.07467643051771, - "linear_total": 72155136, - "nnz": 47671853, - "total": 96101186, - "total_sparsity": 50.394105437991165 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40/checkpoint-22132": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 1, - 2, - 4, - 5, - 6 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8 - ], - "10": [ - 1, - 4, - 5, - 6, - 7, - 8 - ], - "11": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 11 - ], - "2": [ - 8, - 4, - 7 - ], - "3": [ - 2, - 4, - 6, - 7 - ], - "4": [ - 1, - 2 - ], - "5": [ - 1, - 2, - 6, - 7 - ], - "6": [ - 0, - 2, - 3, - 7 - ], - "7": [ - 1, - 3, - 6, - 7, - 11 - ], - "8": [ - 0, - 8, - 4 - ], - "9": [ - 1, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.23651844843897, - "f1": 87.68464122182475 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40/checkpoint-110660", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 17.154361824035647, - "eval_elapsed_time": 24.304617804009467 - }, - "speedup": 2.249829716853412, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 297984, - "linear_dense_total": 4718592, - "linear_nnz": 1477632, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 483840, - "linear_dense_total": 4718592, - "linear_nnz": 1466880, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 73728, - "linear_dense_total": 4718592, - "linear_nnz": 1253376, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 159744, - "linear_dense_total": 4718592, - "linear_nnz": 749568, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 619008, - "linear_dense_total": 4718592, - "linear_nnz": 2388480, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 657408, - "linear_dense_total": 4718592, - "linear_nnz": 2230272, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 705024, - "linear_dense_total": 4718592, - "linear_nnz": 2671104, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 668160, - "linear_dense_total": 4718592, - "linear_nnz": 2241024, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 516096, - "linear_dense_total": 4718592, - "linear_nnz": 2088960, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 384000, - "linear_dense_total": 4718592, - "linear_nnz": 1760256, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 204288, - "linear_dense_total": 4718592, - "linear_nnz": 1973760, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 92160, - "linear_dense_total": 4718592, - "linear_nnz": 1271808, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 21573120, - "linear_sparsity": 70.58269101876675, - "linear_total": 73334784, - "nnz": 45486623, - "total": 97281986, - "total_sparsity": 53.24250164876363 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-22132": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6, - 7, - 9, - 11 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 9 - ], - "10": [ - 1, - 4, - 5, - 6, - 7, - 8, - 9 - ], - "11": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 11 - ], - "2": [ - 1, - 2, - 4, - 7, - 8, - 11 - ], - "3": [ - 2, - 4, - 6, - 7, - 10 - ], - "4": [ - 0, - 1, - 2, - 6, - 11 - ], - "5": [ - 0, - 1, - 2, - 5, - 6, - 7, - 11 - ], - "6": [ - 0, - 2, - 3, - 4, - 7, - 10 - ], - "7": [ - 1, - 3, - 6, - 7, - 11 - ], - "8": [ - 0, - 2, - 3, - 4, - 6, - 8 - ], - "9": [ - 1, - 3, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.4228949858089, - "f1": 87.22907143184382 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-110000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 14.848762104034424, - "eval_elapsed_time": 22.048566517885774 - }, - "speedup": 2.5991656903766382, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 411648, - "linear_dense_total": 4718592, - "linear_nnz": 1198080, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 592896, - "linear_dense_total": 4718592, - "linear_nnz": 1379328, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 87552, - "linear_dense_total": 4718592, - "linear_nnz": 1070592, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 199680, - "linear_dense_total": 4718592, - "linear_nnz": 789504, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 698880, - "linear_dense_total": 4718592, - "linear_nnz": 1878528, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 714240, - "linear_dense_total": 4718592, - "linear_nnz": 2090496, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 834048, - "linear_dense_total": 4718592, - "linear_nnz": 2210304, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 743424, - "linear_dense_total": 4718592, - "linear_nnz": 1726464, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 568320, - "linear_dense_total": 4718592, - "linear_nnz": 1747968, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 450048, - "linear_dense_total": 4718592, - "linear_nnz": 1826304, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 264192, - "linear_dense_total": 4718592, - "linear_nnz": 1443840, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 101376, - "linear_dense_total": 4718592, - "linear_nnz": 1084416, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 18445824, - "linear_sparsity": 73.42200779036827, - "linear_total": 69402624, - "nnz": 42356011, - "total": 93345986, - "total_sparsity": 54.62471091151151 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15/checkpoint-22132": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6, - 7, - 9, - 11 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 9 - ], - "10": [ - 1, - 4, - 5, - 6, - 7, - 8, - 9 - ], - "11": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 11 - ], - "2": [ - 1, - 2, - 4, - 7, - 8, - 11 - ], - "3": [ - 2, - 4, - 6, - 7 - ], - "4": [ - 1, - 2, - 4, - 6, - 11 - ], - "5": [ - 0, - 1, - 2, - 5, - 6, - 7, - 11 - ], - "6": [ - 0, - 2, - 3, - 4, - 7, - 10 - ], - "7": [ - 1, - 3, - 6, - 7, - 11 - ], - "8": [ - 0, - 2, - 3, - 4, - 8 - ], - "9": [ - 1, - 3, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.82686849574267, - "f1": 86.75497848244157 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15/checkpoint-110660", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 14.354346725463868, - "eval_elapsed_time": 21.489493974950165 - }, - "speedup": 2.68869031405704, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 181248, - "linear_dense_total": 4718592, - "linear_nnz": 967680, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 299520, - "linear_dense_total": 4718592, - "linear_nnz": 1085952, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 58368, - "linear_dense_total": 4718592, - "linear_nnz": 1041408, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 96768, - "linear_dense_total": 4718592, - "linear_nnz": 686592, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 407040, - "linear_dense_total": 4718592, - "linear_nnz": 1586688, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 440832, - "linear_dense_total": 4718592, - "linear_nnz": 2013696, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 496128, - "linear_dense_total": 4718592, - "linear_nnz": 1872384, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 433152, - "linear_dense_total": 4718592, - "linear_nnz": 1416192, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 337920, - "linear_dense_total": 4718592, - "linear_nnz": 1517568, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 268800, - "linear_dense_total": 4718592, - "linear_nnz": 1645056, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 158208, - "linear_dense_total": 4718592, - "linear_nnz": 1534464, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 73728, - "linear_dense_total": 4718592, - "linear_nnz": 1056768, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 16424448, - "linear_sparsity": 76.46786971830986, - "linear_total": 69795840, - "nnz": 40333447, - "total": 93739586, - "total_sparsity": 56.972876965767696 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-22132": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 2, - 4, - 5, - 6, - 7 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8 - ], - "10": [ - 1, - 4, - 5, - 6, - 7 - ], - "11": [ - 0, - 2, - 5, - 6, - 7, - 8, - 11 - ], - "2": [ - 8, - 4, - 7 - ], - "3": [ - 2, - 4, - 6 - ], - "4": [ - 2 - ], - "5": [ - 1, - 2 - ], - "6": [ - 2, - 3, - 7 - ], - "7": [ - 11, - 3, - 6, - 7 - ], - "8": [ - 0, - 4 - ], - "9": [ - 1, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 81.69347209082308, - "f1": 88.72194531479171 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-95000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 20.951393741607667, - "eval_elapsed_time": 28.213609586004168 - }, - "speedup": 1.8420919143305463, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1125888, - "linear_dense_total": 4718592, - "linear_nnz": 2502144, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 1285632, - "linear_dense_total": 4718592, - "linear_nnz": 2268672, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 168960, - "linear_dense_total": 4718592, - "linear_nnz": 1545216, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 485376, - "linear_dense_total": 4718592, - "linear_nnz": 1468416, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 1523712, - "linear_dense_total": 4718592, - "linear_nnz": 3293184, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 1555968, - "linear_dense_total": 4718592, - "linear_nnz": 3325440, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 2162688, - "linear_attention_total": 2162688, - "linear_dense_nnz": 1617408, - "linear_dense_total": 4718592, - "linear_nnz": 3780096, - "linear_total": 6881280, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 1514496, - "linear_dense_total": 4718592, - "linear_nnz": 3480576, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 1135104, - "linear_dense_total": 4718592, - "linear_nnz": 2904576, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 847872, - "linear_dense_total": 4718592, - "linear_nnz": 2420736, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 474624, - "linear_dense_total": 4718592, - "linear_nnz": 2440704, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 208896, - "linear_dense_total": 4718592, - "linear_nnz": 1388544, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30818304, - "linear_sparsity": 59.1796875, - "linear_total": 75497472, - "nnz": 54738530, - "total": 99446786, - "total_sparsity": 44.95696422004025 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-20000": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8 - ], - "10": [ - 1, - 4, - 5, - 7 - ], - "11": [ - 0, - 2, - 5, - 6, - 7, - 8, - 11 - ], - "2": [ - 8, - 4 - ], - "3": [ - 2, - 4, - 6 - ], - "4": [ - 2 - ], - "5": [ - 1, - 2 - ], - "6": [ - 2, - 3, - 7 - ], - "7": [ - 11, - 3, - 6, - 7 - ], - "8": [ - 0, - 4 - ], - "9": [ - 1, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.86092715231788, - "f1": 88.26868699204444 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-110660", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 19.458871116638186, - "eval_elapsed_time": 26.62503844080493 - }, - "speedup": 1.98338294004996, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 526848, - "linear_dense_total": 4718592, - "linear_nnz": 1903104, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 752640, - "linear_dense_total": 4718592, - "linear_nnz": 1735680, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 98304, - "linear_dense_total": 4718592, - "linear_nnz": 1671168, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 262656, - "linear_dense_total": 4718592, - "linear_nnz": 1245696, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 873984, - "linear_dense_total": 4718592, - "linear_nnz": 2840064, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 952320, - "linear_dense_total": 4718592, - "linear_nnz": 2721792, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 2162688, - "linear_attention_total": 2162688, - "linear_dense_nnz": 1046016, - "linear_dense_total": 4718592, - "linear_nnz": 3208704, - "linear_total": 6881280, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 986112, - "linear_dense_total": 4718592, - "linear_nnz": 2952192, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 740352, - "linear_dense_total": 4718592, - "linear_nnz": 2509824, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 559104, - "linear_dense_total": 4718592, - "linear_nnz": 2131968, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 293376, - "linear_dense_total": 4718592, - "linear_nnz": 2259456, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 113664, - "linear_dense_total": 4718592, - "linear_nnz": 1293312, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 26472960, - "linear_sparsity": 65.11698510362694, - "linear_total": 75890688, - "nnz": 50390485, - "total": 99840386, - "total_sparsity": 49.52895614806617 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-22132": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8 - ], - "10": [ - 1, - 4, - 5, - 7 - ], - "11": [ - 0, - 2, - 5, - 6, - 7, - 8, - 11 - ], - "2": [ - 8, - 4 - ], - "3": [ - 2, - 4, - 6 - ], - "4": [ - 2 - ], - "5": [ - 1, - 2 - ], - "6": [ - 2, - 3, - 7 - ], - "7": [ - 11, - 3, - 6, - 7 - ], - "8": [ - 0, - 4 - ], - "9": [ - 1, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.87038789025544, - "f1": 88.24613086360249 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-110660", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 19.453059474945068, - "eval_elapsed_time": 26.577815205790102 - }, - "speedup": 1.9839754797994356, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 526848, - "linear_dense_total": 4718592, - "linear_nnz": 1903104, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 752640, - "linear_dense_total": 4718592, - "linear_nnz": 1735680, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 98304, - "linear_dense_total": 4718592, - "linear_nnz": 1671168, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 262656, - "linear_dense_total": 4718592, - "linear_nnz": 1245696, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 873984, - "linear_dense_total": 4718592, - "linear_nnz": 2840064, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 952320, - "linear_dense_total": 4718592, - "linear_nnz": 2721792, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 2162688, - "linear_attention_total": 2162688, - "linear_dense_nnz": 1046016, - "linear_dense_total": 4718592, - "linear_nnz": 3208704, - "linear_total": 6881280, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 986112, - "linear_dense_total": 4718592, - "linear_nnz": 2952192, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 740352, - "linear_dense_total": 4718592, - "linear_nnz": 2509824, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 559104, - "linear_dense_total": 4718592, - "linear_nnz": 2131968, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 293376, - "linear_dense_total": 4718592, - "linear_nnz": 2259456, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 113664, - "linear_dense_total": 4718592, - "linear_nnz": 1293312, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 26472960, - "linear_sparsity": 65.11698510362694, - "linear_total": 75890688, - "nnz": 50390485, - "total": 99840386, - "total_sparsity": 49.52895614806617 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-15000": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6, - 7, - 9, - 11 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 9 - ], - "10": [ - 1, - 4, - 5, - 6, - 7, - 8 - ], - "11": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 11 - ], - "2": [ - 8, - 11, - 4, - 7 - ], - "3": [ - 2, - 4, - 6, - 7 - ], - "4": [ - 1, - 2, - 11 - ], - "5": [ - 1, - 2, - 5, - 6, - 7, - 11 - ], - "6": [ - 0, - 2, - 3, - 7, - 10 - ], - "7": [ - 1, - 3, - 6, - 7, - 11 - ], - "8": [ - 0, - 8, - 3, - 4 - ], - "9": [ - 1, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.80416272469253, - "f1": 88.20260662536118 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 18.439563426971436, - "eval_elapsed_time": 25.7331585730426 - }, - "speedup": 2.0930209740713988, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1339392, - "linear_dense_total": 4718592, - "linear_nnz": 2125824, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1571328, - "linear_dense_total": 4718592, - "linear_nnz": 2357760, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 187392, - "linear_dense_total": 4718592, - "linear_nnz": 1367040, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 574464, - "linear_dense_total": 4718592, - "linear_nnz": 1164288, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 1744896, - "linear_dense_total": 4718592, - "linear_nnz": 3317760, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 1761792, - "linear_dense_total": 4718592, - "linear_nnz": 3334656, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 1726464, - "linear_dense_total": 4718592, - "linear_nnz": 3495936, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 1629696, - "linear_dense_total": 4718592, - "linear_nnz": 2809344, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1270272, - "linear_dense_total": 4718592, - "linear_nnz": 2646528, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 987648, - "linear_dense_total": 4718592, - "linear_nnz": 2363904, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 546816, - "linear_dense_total": 4718592, - "linear_nnz": 2119680, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 248832, - "linear_dense_total": 4718592, - "linear_nnz": 1428480, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 28531200, - "linear_sparsity": 60.1326407967033, - "linear_total": 71565312, - "nnz": 52448657, - "total": 95510786, - "total_sparsity": 45.08614241746477 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-20000": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6, - 7, - 9, - 11 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 9 - ], - "10": [ - 1, - 4, - 5, - 6, - 7, - 8 - ], - "11": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 11 - ], - "2": [ - 8, - 11, - 4, - 7 - ], - "3": [ - 2, - 4, - 6, - 7 - ], - "4": [ - 1, - 2, - 11 - ], - "5": [ - 1, - 2, - 5, - 6, - 7, - 11 - ], - "6": [ - 0, - 2, - 3, - 7, - 10 - ], - "7": [ - 1, - 3, - 6, - 7, - 11 - ], - "8": [ - 0, - 8, - 3, - 4 - ], - "9": [ - 1, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.6717123935667, - "f1": 88.128983727943 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 18.875869693756105, - "eval_elapsed_time": 26.023085076361895 - }, - "speedup": 2.044641843344449, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1339392, - "linear_dense_total": 4718592, - "linear_nnz": 2125824, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1571328, - "linear_dense_total": 4718592, - "linear_nnz": 2357760, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 187392, - "linear_dense_total": 4718592, - "linear_nnz": 1367040, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 574464, - "linear_dense_total": 4718592, - "linear_nnz": 1164288, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 1744896, - "linear_dense_total": 4718592, - "linear_nnz": 3317760, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 1761792, - "linear_dense_total": 4718592, - "linear_nnz": 3334656, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 1726464, - "linear_dense_total": 4718592, - "linear_nnz": 3495936, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 1629696, - "linear_dense_total": 4718592, - "linear_nnz": 2809344, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1270272, - "linear_dense_total": 4718592, - "linear_nnz": 2646528, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 987648, - "linear_dense_total": 4718592, - "linear_nnz": 2363904, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 546816, - "linear_dense_total": 4718592, - "linear_nnz": 2119680, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 248832, - "linear_dense_total": 4718592, - "linear_nnz": 1428480, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 28531200, - "linear_sparsity": 60.1326407967033, - "linear_total": 71565312, - "nnz": 52448657, - "total": 95510786, - "total_sparsity": 45.08614241746477 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6, - 7, - 9, - 11 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 9 - ], - "10": [ - 1, - 4, - 5, - 6, - 7, - 8 - ], - "11": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 11 - ], - "2": [ - 8, - 11, - 4, - 7 - ], - "3": [ - 2, - 4, - 6, - 7 - ], - "4": [ - 1, - 2, - 11 - ], - "5": [ - 1, - 2, - 5, - 6, - 7, - 11 - ], - "6": [ - 0, - 2, - 3, - 7, - 10 - ], - "7": [ - 1, - 3, - 6, - 7, - 11 - ], - "8": [ - 0, - 8, - 3, - 4 - ], - "9": [ - 1, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.68117313150425, - "f1": 88.11014400914335 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 18.42703369522095, - "eval_elapsed_time": 25.61402732366696 - }, - "speedup": 2.094444154371984, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1339392, - "linear_dense_total": 4718592, - "linear_nnz": 2125824, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1571328, - "linear_dense_total": 4718592, - "linear_nnz": 2357760, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 187392, - "linear_dense_total": 4718592, - "linear_nnz": 1367040, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 574464, - "linear_dense_total": 4718592, - "linear_nnz": 1164288, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 1744896, - "linear_dense_total": 4718592, - "linear_nnz": 3317760, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 1761792, - "linear_dense_total": 4718592, - "linear_nnz": 3334656, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 1726464, - "linear_dense_total": 4718592, - "linear_nnz": 3495936, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 1629696, - "linear_dense_total": 4718592, - "linear_nnz": 2809344, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1270272, - "linear_dense_total": 4718592, - "linear_nnz": 2646528, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 987648, - "linear_dense_total": 4718592, - "linear_nnz": 2363904, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 546816, - "linear_dense_total": 4718592, - "linear_nnz": 2119680, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 248832, - "linear_dense_total": 4718592, - "linear_nnz": 1428480, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 28531200, - "linear_sparsity": 60.1326407967033, - "linear_total": 71565312, - "nnz": 52448657, - "total": 95510786, - "total_sparsity": 45.08614241746477 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-20000": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6, - 7, - 9, - 11 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 9 - ], - "10": [ - 1, - 4, - 5, - 6, - 7, - 8, - 9 - ], - "11": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 11 - ], - "2": [ - 1, - 2, - 4, - 7, - 8, - 11 - ], - "3": [ - 2, - 4, - 6, - 7, - 10 - ], - "4": [ - 0, - 1, - 2, - 6, - 11 - ], - "5": [ - 0, - 1, - 2, - 5, - 6, - 7, - 11 - ], - "6": [ - 0, - 2, - 3, - 4, - 7, - 10 - ], - "7": [ - 1, - 3, - 6, - 7, - 11 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 6, - 8 - ], - "9": [ - 1, - 3, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.01892147587512, - "f1": 87.70568682399205 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 15.845825397491456, - "eval_elapsed_time": 23.001069764140993 - }, - "speedup": 2.4356189745395627, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 847872, - "linear_dense_total": 4718592, - "linear_nnz": 1634304, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1101312, - "linear_dense_total": 4718592, - "linear_nnz": 1887744, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 147456, - "linear_dense_total": 4718592, - "linear_nnz": 1130496, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 365568, - "linear_dense_total": 4718592, - "linear_nnz": 955392, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 1221120, - "linear_dense_total": 4718592, - "linear_nnz": 2400768, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1211904, - "linear_dense_total": 4718592, - "linear_nnz": 2588160, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1279488, - "linear_dense_total": 4718592, - "linear_nnz": 2655744, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 1216512, - "linear_dense_total": 4718592, - "linear_nnz": 2199552, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 952320, - "linear_dense_total": 4718592, - "linear_nnz": 2131968, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 715776, - "linear_dense_total": 4718592, - "linear_nnz": 2092032, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 434688, - "linear_dense_total": 4718592, - "linear_nnz": 1417728, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 172032, - "linear_dense_total": 4718592, - "linear_nnz": 1155072, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 22248960, - "linear_sparsity": 67.85111860795455, - "linear_total": 69206016, - "nnz": 46161559, - "total": 93149186, - "total_sparsity": 50.443411282198426 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6, - 7, - 9, - 11 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 9 - ], - "10": [ - 1, - 4, - 5, - 6, - 7, - 8, - 9 - ], - "11": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 11 - ], - "2": [ - 1, - 2, - 4, - 7, - 8, - 11 - ], - "3": [ - 2, - 4, - 6, - 7, - 10 - ], - "4": [ - 0, - 1, - 2, - 6, - 11 - ], - "5": [ - 0, - 1, - 2, - 5, - 6, - 7, - 11 - ], - "6": [ - 0, - 2, - 3, - 4, - 7, - 10 - ], - "7": [ - 1, - 3, - 6, - 7, - 11 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 6, - 8 - ], - "9": [ - 1, - 3, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.02838221381268, - "f1": 87.70940223967354 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 15.838374267578125, - "eval_elapsed_time": 22.999519595876336 - }, - "speedup": 2.436764806371294, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 847872, - "linear_dense_total": 4718592, - "linear_nnz": 1634304, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1101312, - "linear_dense_total": 4718592, - "linear_nnz": 1887744, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 147456, - "linear_dense_total": 4718592, - "linear_nnz": 1130496, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 365568, - "linear_dense_total": 4718592, - "linear_nnz": 955392, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 1221120, - "linear_dense_total": 4718592, - "linear_nnz": 2400768, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1211904, - "linear_dense_total": 4718592, - "linear_nnz": 2588160, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1279488, - "linear_dense_total": 4718592, - "linear_nnz": 2655744, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 1216512, - "linear_dense_total": 4718592, - "linear_nnz": 2199552, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 952320, - "linear_dense_total": 4718592, - "linear_nnz": 2131968, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 715776, - "linear_dense_total": 4718592, - "linear_nnz": 2092032, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 434688, - "linear_dense_total": 4718592, - "linear_nnz": 1417728, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 172032, - "linear_dense_total": 4718592, - "linear_nnz": 1155072, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 22248960, - "linear_sparsity": 67.85111860795455, - "linear_total": 69206016, - "nnz": 46161559, - "total": 93149186, - "total_sparsity": 50.443411282198426 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6, - 7, - 9, - 11 - ], - "1": [ - 0, - 1, - 2, - 3, - 5, - 6, - 7, - 8, - 9 - ], - "10": [ - 1, - 4, - 5, - 6, - 7, - 8, - 9 - ], - "11": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 11 - ], - "2": [ - 1, - 2, - 3, - 4, - 5, - 7, - 8, - 10, - 11 - ], - "3": [ - 2, - 3, - 4, - 6, - 7, - 10 - ], - "4": [ - 0, - 1, - 2, - 6, - 7, - 8, - 9, - 11 - ], - "5": [ - 0, - 1, - 2, - 5, - 6, - 7, - 11 - ], - "6": [ - 0, - 2, - 3, - 4, - 6, - 7, - 10 - ], - "7": [ - 1, - 3, - 6, - 7, - 11 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8 - ], - "9": [ - 1, - 3, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.63765373699148, - "f1": 86.69392512957342 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 13.783753513336181, - "eval_elapsed_time": 20.85535095212981 - }, - "speedup": 2.799991523936488, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 482304, - "linear_dense_total": 4718592, - "linear_nnz": 1268736, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 706560, - "linear_dense_total": 4718592, - "linear_nnz": 1296384, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 121344, - "linear_dense_total": 4718592, - "linear_nnz": 1104384, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 215040, - "linear_dense_total": 4718592, - "linear_nnz": 804864, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 850944, - "linear_dense_total": 4718592, - "linear_nnz": 1440768, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 826368, - "linear_dense_total": 4718592, - "linear_nnz": 2006016, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 923136, - "linear_dense_total": 4718592, - "linear_nnz": 1709568, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 880128, - "linear_dense_total": 4718592, - "linear_nnz": 1863168, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 645120, - "linear_dense_total": 4718592, - "linear_nnz": 1628160, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 525312, - "linear_dense_total": 4718592, - "linear_nnz": 1901568, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 333312, - "linear_dense_total": 4718592, - "linear_nnz": 923136, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 113664, - "linear_dense_total": 4718592, - "linear_nnz": 1096704, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 17043456, - "linear_sparsity": 74.57844574780059, - "linear_total": 67043328, - "nnz": 40951962, - "total": 90984386, - "total_sparsity": 54.990121052199 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10/checkpoint-47500": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 2, - 3, - 4, - 7, - 8, - 9, - 10, - 13, - 14, - 15 - ], - "1": [ - 0, - 1, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "10": [ - 7, - 8, - 10, - 12, - 13, - 14 - ], - "11": [ - 0, - 2, - 4, - 5, - 8, - 10 - ], - "12": [ - 10, - 3, - 13, - 6 - ], - "13": [ - 2, - 10, - 4, - 12 - ], - "14": [ - 2, - 3, - 4, - 8, - 11 - ], - "15": [ - 0, - 5, - 6, - 7, - 11, - 12 - ], - "16": [ - 3, - 6, - 8, - 13, - 15 - ], - "17": [ - 0, - 2, - 4, - 11, - 15 - ], - "18": [ - 2, - 3, - 5, - 11, - 13 - ], - "19": [ - 0, - 2, - 3, - 4, - 9, - 10, - 11, - 15 - ], - "2": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14, - 15 - ], - "20": [ - 0, - 1, - 3, - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 15 - ], - "21": [ - 2, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "22": [ - 0, - 1, - 2, - 3, - 4, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14 - ], - "23": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 9, - 10, - 12, - 13, - 14 - ], - "3": [ - 0, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 10, - 14, - 15 - ], - "4": [ - 0, - 1, - 2, - 4, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "5": [ - 0, - 1, - 3, - 4, - 5, - 6, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "6": [ - 0, - 1, - 2, - 3, - 5, - 6, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "7": [ - 0, - 1, - 2, - 4, - 5, - 6, - 8, - 10, - 11, - 13, - 14 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 12, - 13, - 14, - 15 - ], - "9": [ - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 12, - 13, - 15 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 83.74645222327341, - "f1": 90.16320537561052 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-215000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 37.53850735473633, - "eval_elapsed_time": 44.58338421070948 - }, - "speedup": 1.0281280670181348, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 192512, - "linear_dense_total": 8388608, - "linear_nnz": 1765376, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 794624, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 995328, - "linear_dense_total": 8388608, - "linear_nnz": 3616768, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1032192, - "linear_dense_total": 8388608, - "linear_nnz": 3653632, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1241088, - "linear_dense_total": 8388608, - "linear_nnz": 4386816, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1179648, - "linear_dense_total": 8388608, - "linear_nnz": 4325376, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 909312, - "linear_dense_total": 8388608, - "linear_nnz": 3792896, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 681984, - "linear_dense_total": 8388608, - "linear_nnz": 3303424, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 473088, - "linear_dense_total": 8388608, - "linear_nnz": 3356672, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 368640, - "linear_dense_total": 8388608, - "linear_nnz": 3252224, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 321536, - "linear_dense_total": 8388608, - "linear_nnz": 3205120, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 2367488, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 286720, - "linear_dense_total": 8388608, - "linear_nnz": 811008, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 899072, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 77824, - "linear_dense_total": 8388608, - "linear_nnz": 1388544, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 79872, - "linear_dense_total": 8388608, - "linear_nnz": 866304, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 182272, - "linear_dense_total": 8388608, - "linear_nnz": 1230848, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 413696, - "linear_dense_total": 8388608, - "linear_nnz": 1724416, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 466944, - "linear_dense_total": 8388608, - "linear_nnz": 991232, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 552960, - "linear_dense_total": 8388608, - "linear_nnz": 1077248, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 608256, - "linear_dense_total": 8388608, - "linear_nnz": 1394688, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 438272, - "linear_dense_total": 8388608, - "linear_nnz": 1748992, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 661504, - "linear_dense_total": 8388608, - "linear_nnz": 1710080, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 747520, - "linear_dense_total": 8388608, - "linear_nnz": 2320384, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 53983232, - "linear_sparsity": 77.76137284017278, - "linear_total": 242745344, - "nnz": 85952121, - "total": 274806402, - "total_sparsity": 68.72266425583491 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 10, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10/checkpoint-55330": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 2, - 3, - 4, - 7, - 8, - 9, - 10, - 13, - 14, - 15 - ], - "1": [ - 0, - 1, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "10": [ - 7, - 8, - 10, - 12, - 13, - 14 - ], - "11": [ - 0, - 2, - 4, - 5, - 8, - 10 - ], - "12": [ - 10, - 3, - 13, - 6 - ], - "13": [ - 2, - 10, - 4, - 12 - ], - "14": [ - 2, - 3, - 4, - 8, - 11 - ], - "15": [ - 0, - 5, - 6, - 7, - 11, - 12 - ], - "16": [ - 3, - 6, - 8, - 13, - 15 - ], - "17": [ - 0, - 2, - 4, - 11, - 15 - ], - "18": [ - 2, - 3, - 5, - 11, - 13 - ], - "19": [ - 0, - 2, - 3, - 4, - 9, - 10, - 11, - 15 - ], - "2": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14, - 15 - ], - "20": [ - 0, - 1, - 3, - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 15 - ], - "21": [ - 2, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "22": [ - 0, - 1, - 2, - 3, - 4, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14 - ], - "23": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 9, - 10, - 12, - 13, - 14 - ], - "3": [ - 0, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 10, - 14, - 15 - ], - "4": [ - 0, - 1, - 2, - 4, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "5": [ - 0, - 1, - 3, - 4, - 5, - 6, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "6": [ - 0, - 1, - 2, - 3, - 5, - 6, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "7": [ - 0, - 1, - 2, - 4, - 5, - 6, - 8, - 10, - 11, - 13, - 14 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 12, - 13, - 14, - 15 - ], - "9": [ - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 12, - 13, - 15 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 83.62346263008514, - "f1": 90.10843526218638 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-215000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 37.30008307647705, - "eval_elapsed_time": 44.469506811816245 - }, - "speedup": 1.034699920808227, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 192512, - "linear_dense_total": 8388608, - "linear_nnz": 1765376, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 794624, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 995328, - "linear_dense_total": 8388608, - "linear_nnz": 3616768, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1032192, - "linear_dense_total": 8388608, - "linear_nnz": 3653632, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1241088, - "linear_dense_total": 8388608, - "linear_nnz": 4386816, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1179648, - "linear_dense_total": 8388608, - "linear_nnz": 4325376, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 909312, - "linear_dense_total": 8388608, - "linear_nnz": 3792896, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 681984, - "linear_dense_total": 8388608, - "linear_nnz": 3303424, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 473088, - "linear_dense_total": 8388608, - "linear_nnz": 3356672, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 368640, - "linear_dense_total": 8388608, - "linear_nnz": 3252224, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 321536, - "linear_dense_total": 8388608, - "linear_nnz": 3205120, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 2367488, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 286720, - "linear_dense_total": 8388608, - "linear_nnz": 811008, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 899072, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 77824, - "linear_dense_total": 8388608, - "linear_nnz": 1388544, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 79872, - "linear_dense_total": 8388608, - "linear_nnz": 866304, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 182272, - "linear_dense_total": 8388608, - "linear_nnz": 1230848, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 413696, - "linear_dense_total": 8388608, - "linear_nnz": 1724416, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 466944, - "linear_dense_total": 8388608, - "linear_nnz": 991232, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 552960, - "linear_dense_total": 8388608, - "linear_nnz": 1077248, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 608256, - "linear_dense_total": 8388608, - "linear_nnz": 1394688, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 438272, - "linear_dense_total": 8388608, - "linear_nnz": 1748992, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 661504, - "linear_dense_total": 8388608, - "linear_nnz": 1710080, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 747520, - "linear_dense_total": 8388608, - "linear_nnz": 2320384, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 53983232, - "linear_sparsity": 77.76137284017278, - "linear_total": 242745344, - "nnz": 85952121, - "total": 274806402, - "total_sparsity": 68.72266425583491 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 10, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25/checkpoint-22500": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "1": [ - 0, - 1, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "10": [ - 0, - 7, - 8, - 10, - 12, - 13 - ], - "11": [ - 0, - 1, - 2, - 4, - 5, - 8, - 10 - ], - "12": [ - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 13 - ], - "13": [ - 10, - 2, - 3, - 12 - ], - "14": [ - 1, - 2, - 3, - 4, - 8, - 11 - ], - "15": [ - 0, - 5, - 6, - 7, - 11, - 12 - ], - "16": [ - 3, - 6, - 8, - 10, - 13, - 15 - ], - "17": [ - 0, - 2, - 11, - 15 - ], - "18": [ - 2, - 3, - 5, - 9, - 11, - 12, - 13 - ], - "19": [ - 0, - 2, - 3, - 4, - 9, - 10, - 11, - 13, - 15 - ], - "2": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14, - 15 - ], - "20": [ - 1, - 3, - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 15 - ], - "21": [ - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "22": [ - 0, - 1, - 2, - 3, - 4, - 5, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "23": [ - 1, - 2, - 4, - 5, - 6, - 7, - 9, - 10, - 12, - 13, - 14 - ], - "3": [ - 0, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 10, - 13, - 14, - 15 - ], - "4": [ - 0, - 1, - 2, - 4, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "5": [ - 0, - 1, - 3, - 4, - 5, - 6, - 8, - 9, - 10, - 11, - 12, - 13, - 14 - ], - "6": [ - 0, - 1, - 2, - 3, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "7": [ - 0, - 1, - 2, - 4, - 5, - 6, - 8, - 10, - 11, - 13, - 14, - 15 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "9": [ - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 9, - 12, - 13, - 15 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 84.399243140965, - "f1": 90.84270784891945 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10_d0.25/checkpoint-210000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 41.6732879486084, - "eval_elapsed_time": 48.981834520120174 - }, - "speedup": 0.9261182619659336, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 835584, - "linear_dense_total": 8388608, - "linear_nnz": 2408448, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1275904, - "linear_dense_total": 8388608, - "linear_nnz": 1800192, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2410496, - "linear_dense_total": 8388608, - "linear_nnz": 5031936, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2510848, - "linear_dense_total": 8388608, - "linear_nnz": 4870144, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 2660352, - "linear_dense_total": 8388608, - "linear_nnz": 4757504, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 2605056, - "linear_dense_total": 8388608, - "linear_nnz": 5750784, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2299904, - "linear_dense_total": 8388608, - "linear_nnz": 4921344, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1699840, - "linear_dense_total": 8388608, - "linear_nnz": 4321280, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1402880, - "linear_dense_total": 8388608, - "linear_nnz": 4024320, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1097728, - "linear_dense_total": 8388608, - "linear_nnz": 4243456, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 3260416, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 1835008, - "linear_attention_total": 1835008, - "linear_dense_nnz": 739328, - "linear_dense_total": 8388608, - "linear_nnz": 2574336, - "linear_total": 10223616, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1359872, - "linear_dense_total": 8388608, - "linear_nnz": 1884160, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 358400, - "linear_dense_total": 8388608, - "linear_nnz": 1406976, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 194560, - "linear_dense_total": 8388608, - "linear_nnz": 1243136, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 180224, - "linear_dense_total": 8388608, - "linear_nnz": 704512, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 323584, - "linear_dense_total": 8388608, - "linear_nnz": 1634304, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1685504, - "linear_dense_total": 8388608, - "linear_nnz": 2734080, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1767424, - "linear_dense_total": 8388608, - "linear_nnz": 2291712, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1873920, - "linear_dense_total": 8388608, - "linear_nnz": 2660352, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 2054144, - "linear_dense_total": 8388608, - "linear_nnz": 2578432, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1773568, - "linear_dense_total": 8388608, - "linear_nnz": 2822144, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1968128, - "linear_dense_total": 8388608, - "linear_nnz": 2492416, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 1986560, - "linear_dense_total": 8388608, - "linear_nnz": 3297280, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 73713664, - "linear_sparsity": 69.16718064692982, - "linear_total": 239075328, - "nnz": 105691291, - "total": 271133698, - "total_sparsity": 61.01875503501597 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 5, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25/checkpoint-25000": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "1": [ - 0, - 1, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "10": [ - 0, - 7, - 8, - 10, - 12, - 13 - ], - "11": [ - 0, - 1, - 2, - 4, - 5, - 8, - 10 - ], - "12": [ - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 13 - ], - "13": [ - 10, - 2, - 3, - 12 - ], - "14": [ - 1, - 2, - 3, - 4, - 8, - 11 - ], - "15": [ - 0, - 5, - 6, - 7, - 11, - 12 - ], - "16": [ - 3, - 6, - 8, - 10, - 13, - 15 - ], - "17": [ - 0, - 2, - 11, - 15 - ], - "18": [ - 2, - 3, - 5, - 9, - 11, - 12, - 13 - ], - "19": [ - 0, - 2, - 3, - 4, - 9, - 10, - 11, - 13, - 15 - ], - "2": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14, - 15 - ], - "20": [ - 1, - 3, - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 15 - ], - "21": [ - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "22": [ - 0, - 1, - 2, - 3, - 4, - 5, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "23": [ - 1, - 2, - 4, - 5, - 6, - 7, - 9, - 10, - 12, - 13, - 14 - ], - "3": [ - 0, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 10, - 13, - 14, - 15 - ], - "4": [ - 0, - 1, - 2, - 4, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "5": [ - 0, - 1, - 3, - 4, - 5, - 6, - 8, - 9, - 10, - 11, - 12, - 13, - 14 - ], - "6": [ - 0, - 1, - 2, - 3, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "7": [ - 0, - 1, - 2, - 4, - 5, - 6, - 8, - 10, - 11, - 13, - 14, - 15 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "9": [ - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 9, - 12, - 13, - 15 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 84.20056764427625, - "f1": 90.73941291394593 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10_d0.25/checkpoint-210000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 41.50353849792481, - "eval_elapsed_time": 49.06402187002823 - }, - "speedup": 0.929906085171529, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 835584, - "linear_dense_total": 8388608, - "linear_nnz": 2408448, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1275904, - "linear_dense_total": 8388608, - "linear_nnz": 1800192, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2410496, - "linear_dense_total": 8388608, - "linear_nnz": 5031936, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2510848, - "linear_dense_total": 8388608, - "linear_nnz": 4870144, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 2660352, - "linear_dense_total": 8388608, - "linear_nnz": 4757504, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 2605056, - "linear_dense_total": 8388608, - "linear_nnz": 5750784, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2299904, - "linear_dense_total": 8388608, - "linear_nnz": 4921344, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1699840, - "linear_dense_total": 8388608, - "linear_nnz": 4321280, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1402880, - "linear_dense_total": 8388608, - "linear_nnz": 4024320, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1097728, - "linear_dense_total": 8388608, - "linear_nnz": 4243456, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 3260416, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 1835008, - "linear_attention_total": 1835008, - "linear_dense_nnz": 739328, - "linear_dense_total": 8388608, - "linear_nnz": 2574336, - "linear_total": 10223616, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1359872, - "linear_dense_total": 8388608, - "linear_nnz": 1884160, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 358400, - "linear_dense_total": 8388608, - "linear_nnz": 1406976, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 194560, - "linear_dense_total": 8388608, - "linear_nnz": 1243136, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 180224, - "linear_dense_total": 8388608, - "linear_nnz": 704512, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 323584, - "linear_dense_total": 8388608, - "linear_nnz": 1634304, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1685504, - "linear_dense_total": 8388608, - "linear_nnz": 2734080, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1767424, - "linear_dense_total": 8388608, - "linear_nnz": 2291712, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1873920, - "linear_dense_total": 8388608, - "linear_nnz": 2660352, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 2054144, - "linear_dense_total": 8388608, - "linear_nnz": 2578432, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1773568, - "linear_dense_total": 8388608, - "linear_nnz": 2822144, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1968128, - "linear_dense_total": 8388608, - "linear_nnz": 2492416, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 1986560, - "linear_dense_total": 8388608, - "linear_nnz": 3297280, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 73713664, - "linear_sparsity": 69.16718064692982, - "linear_total": 239075328, - "nnz": 105691291, - "total": 271133698, - "total_sparsity": 61.01875503501597 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 5, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25/checkpoint-27665": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "1": [ - 0, - 1, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "10": [ - 0, - 7, - 8, - 10, - 12, - 13 - ], - "11": [ - 0, - 1, - 2, - 4, - 5, - 8, - 10 - ], - "12": [ - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 13 - ], - "13": [ - 10, - 2, - 3, - 12 - ], - "14": [ - 1, - 2, - 3, - 4, - 8, - 11 - ], - "15": [ - 0, - 5, - 6, - 7, - 11, - 12 - ], - "16": [ - 3, - 6, - 8, - 10, - 13, - 15 - ], - "17": [ - 0, - 2, - 11, - 15 - ], - "18": [ - 2, - 3, - 5, - 9, - 11, - 12, - 13 - ], - "19": [ - 0, - 2, - 3, - 4, - 9, - 10, - 11, - 13, - 15 - ], - "2": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14, - 15 - ], - "20": [ - 1, - 3, - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 15 - ], - "21": [ - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "22": [ - 0, - 1, - 2, - 3, - 4, - 5, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "23": [ - 1, - 2, - 4, - 5, - 6, - 7, - 9, - 10, - 12, - 13, - 14 - ], - "3": [ - 0, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 10, - 13, - 14, - 15 - ], - "4": [ - 0, - 1, - 2, - 4, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "5": [ - 0, - 1, - 3, - 4, - 5, - 6, - 8, - 9, - 10, - 11, - 12, - 13, - 14 - ], - "6": [ - 0, - 1, - 2, - 3, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "7": [ - 0, - 1, - 2, - 4, - 5, - 6, - 8, - 10, - 11, - 13, - 14, - 15 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "9": [ - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 9, - 12, - 13, - 15 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 84.2100283822138, - "f1": 90.70141124860059 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10_d0.25/checkpoint-210000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 41.6272840423584, - "eval_elapsed_time": 49.02150737866759 - }, - "speedup": 0.9271417507348992, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 835584, - "linear_dense_total": 8388608, - "linear_nnz": 2408448, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1275904, - "linear_dense_total": 8388608, - "linear_nnz": 1800192, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2410496, - "linear_dense_total": 8388608, - "linear_nnz": 5031936, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2510848, - "linear_dense_total": 8388608, - "linear_nnz": 4870144, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 2660352, - "linear_dense_total": 8388608, - "linear_nnz": 4757504, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 2605056, - "linear_dense_total": 8388608, - "linear_nnz": 5750784, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2299904, - "linear_dense_total": 8388608, - "linear_nnz": 4921344, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1699840, - "linear_dense_total": 8388608, - "linear_nnz": 4321280, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1402880, - "linear_dense_total": 8388608, - "linear_nnz": 4024320, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1097728, - "linear_dense_total": 8388608, - "linear_nnz": 4243456, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 3260416, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 1835008, - "linear_attention_total": 1835008, - "linear_dense_nnz": 739328, - "linear_dense_total": 8388608, - "linear_nnz": 2574336, - "linear_total": 10223616, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1359872, - "linear_dense_total": 8388608, - "linear_nnz": 1884160, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 358400, - "linear_dense_total": 8388608, - "linear_nnz": 1406976, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 194560, - "linear_dense_total": 8388608, - "linear_nnz": 1243136, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 180224, - "linear_dense_total": 8388608, - "linear_nnz": 704512, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 323584, - "linear_dense_total": 8388608, - "linear_nnz": 1634304, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1685504, - "linear_dense_total": 8388608, - "linear_nnz": 2734080, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1767424, - "linear_dense_total": 8388608, - "linear_nnz": 2291712, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1873920, - "linear_dense_total": 8388608, - "linear_nnz": 2660352, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 2054144, - "linear_dense_total": 8388608, - "linear_nnz": 2578432, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1773568, - "linear_dense_total": 8388608, - "linear_nnz": 2822144, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1968128, - "linear_dense_total": 8388608, - "linear_nnz": 2492416, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 1986560, - "linear_dense_total": 8388608, - "linear_nnz": 3297280, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 73713664, - "linear_sparsity": 69.16718064692982, - "linear_total": 239075328, - "nnz": 105691291, - "total": 271133698, - "total_sparsity": 61.01875503501597 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 5, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25_v3_f91.03/checkpoint-55000": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "1": [ - 0, - 1, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "10": [ - 0, - 7, - 8, - 10, - 12, - 13 - ], - "11": [ - 0, - 1, - 2, - 4, - 5, - 8, - 10 - ], - "12": [ - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 13 - ], - "13": [ - 10, - 2, - 3, - 12 - ], - "14": [ - 1, - 2, - 3, - 4, - 8, - 11 - ], - "15": [ - 0, - 5, - 6, - 7, - 11, - 12 - ], - "16": [ - 3, - 6, - 8, - 10, - 13, - 15 - ], - "17": [ - 0, - 2, - 11, - 15 - ], - "18": [ - 2, - 3, - 5, - 9, - 11, - 12, - 13 - ], - "19": [ - 0, - 2, - 3, - 4, - 9, - 10, - 11, - 13, - 15 - ], - "2": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14, - 15 - ], - "20": [ - 1, - 3, - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 15 - ], - "21": [ - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "22": [ - 0, - 1, - 2, - 3, - 4, - 5, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "23": [ - 1, - 2, - 4, - 5, - 6, - 7, - 9, - 10, - 12, - 13, - 14 - ], - "3": [ - 0, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 10, - 13, - 14, - 15 - ], - "4": [ - 0, - 1, - 2, - 4, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "5": [ - 0, - 1, - 3, - 4, - 5, - 6, - 8, - 9, - 10, - 11, - 12, - 13, - 14 - ], - "6": [ - 0, - 1, - 2, - 3, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "7": [ - 0, - 1, - 2, - 4, - 5, - 6, - 8, - 10, - 11, - 13, - 14, - 15 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "9": [ - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 9, - 12, - 13, - 15 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 84.63576158940397, - "f1": 91.0266636723574 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10_d0.25/checkpoint-210000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 41.85157574462891, - "eval_elapsed_time": 49.32021534908563 - }, - "speedup": 0.9221729963255725, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 835584, - "linear_dense_total": 8388608, - "linear_nnz": 2408448, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1275904, - "linear_dense_total": 8388608, - "linear_nnz": 1800192, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2410496, - "linear_dense_total": 8388608, - "linear_nnz": 5031936, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2510848, - "linear_dense_total": 8388608, - "linear_nnz": 4870144, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 2660352, - "linear_dense_total": 8388608, - "linear_nnz": 4757504, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 2605056, - "linear_dense_total": 8388608, - "linear_nnz": 5750784, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2299904, - "linear_dense_total": 8388608, - "linear_nnz": 4921344, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1699840, - "linear_dense_total": 8388608, - "linear_nnz": 4321280, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1402880, - "linear_dense_total": 8388608, - "linear_nnz": 4024320, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1097728, - "linear_dense_total": 8388608, - "linear_nnz": 4243456, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 3260416, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 1835008, - "linear_attention_total": 1835008, - "linear_dense_nnz": 739328, - "linear_dense_total": 8388608, - "linear_nnz": 2574336, - "linear_total": 10223616, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1359872, - "linear_dense_total": 8388608, - "linear_nnz": 1884160, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 358400, - "linear_dense_total": 8388608, - "linear_nnz": 1406976, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 194560, - "linear_dense_total": 8388608, - "linear_nnz": 1243136, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 180224, - "linear_dense_total": 8388608, - "linear_nnz": 704512, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 323584, - "linear_dense_total": 8388608, - "linear_nnz": 1634304, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1685504, - "linear_dense_total": 8388608, - "linear_nnz": 2734080, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1767424, - "linear_dense_total": 8388608, - "linear_nnz": 2291712, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1873920, - "linear_dense_total": 8388608, - "linear_nnz": 2660352, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 2054144, - "linear_dense_total": 8388608, - "linear_nnz": 2578432, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1773568, - "linear_dense_total": 8388608, - "linear_nnz": 2822144, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1968128, - "linear_dense_total": 8388608, - "linear_nnz": 2492416, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 1986560, - "linear_dense_total": 8388608, - "linear_nnz": 3297280, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 73713664, - "linear_sparsity": 69.16718064692982, - "linear_total": 239075328, - "nnz": 105691291, - "total": 271133698, - "total_sparsity": 61.01875503501597 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 10, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25_v3_f91.03/checkpoint-55330": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "1": [ - 0, - 1, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "10": [ - 0, - 7, - 8, - 10, - 12, - 13 - ], - "11": [ - 0, - 1, - 2, - 4, - 5, - 8, - 10 - ], - "12": [ - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 13 - ], - "13": [ - 10, - 2, - 3, - 12 - ], - "14": [ - 1, - 2, - 3, - 4, - 8, - 11 - ], - "15": [ - 0, - 5, - 6, - 7, - 11, - 12 - ], - "16": [ - 3, - 6, - 8, - 10, - 13, - 15 - ], - "17": [ - 0, - 2, - 11, - 15 - ], - "18": [ - 2, - 3, - 5, - 9, - 11, - 12, - 13 - ], - "19": [ - 0, - 2, - 3, - 4, - 9, - 10, - 11, - 13, - 15 - ], - "2": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14, - 15 - ], - "20": [ - 1, - 3, - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 15 - ], - "21": [ - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "22": [ - 0, - 1, - 2, - 3, - 4, - 5, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "23": [ - 1, - 2, - 4, - 5, - 6, - 7, - 9, - 10, - 12, - 13, - 14 - ], - "3": [ - 0, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 10, - 13, - 14, - 15 - ], - "4": [ - 0, - 1, - 2, - 4, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "5": [ - 0, - 1, - 3, - 4, - 5, - 6, - 8, - 9, - 10, - 11, - 12, - 13, - 14 - ], - "6": [ - 0, - 1, - 2, - 3, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "7": [ - 0, - 1, - 2, - 4, - 5, - 6, - 8, - 10, - 11, - 13, - 14, - 15 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "9": [ - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 9, - 12, - 13, - 15 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 84.65468306527909, - "f1": 91.01004624462917 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10_d0.25/checkpoint-210000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 41.85431317138672, - "eval_elapsed_time": 49.428419118281454 - }, - "speedup": 0.922112682803639, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 835584, - "linear_dense_total": 8388608, - "linear_nnz": 2408448, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1275904, - "linear_dense_total": 8388608, - "linear_nnz": 1800192, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2410496, - "linear_dense_total": 8388608, - "linear_nnz": 5031936, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2510848, - "linear_dense_total": 8388608, - "linear_nnz": 4870144, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 2660352, - "linear_dense_total": 8388608, - "linear_nnz": 4757504, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 2605056, - "linear_dense_total": 8388608, - "linear_nnz": 5750784, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2299904, - "linear_dense_total": 8388608, - "linear_nnz": 4921344, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1699840, - "linear_dense_total": 8388608, - "linear_nnz": 4321280, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1402880, - "linear_dense_total": 8388608, - "linear_nnz": 4024320, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1097728, - "linear_dense_total": 8388608, - "linear_nnz": 4243456, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 3260416, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 1835008, - "linear_attention_total": 1835008, - "linear_dense_nnz": 739328, - "linear_dense_total": 8388608, - "linear_nnz": 2574336, - "linear_total": 10223616, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1359872, - "linear_dense_total": 8388608, - "linear_nnz": 1884160, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 358400, - "linear_dense_total": 8388608, - "linear_nnz": 1406976, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 194560, - "linear_dense_total": 8388608, - "linear_nnz": 1243136, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 180224, - "linear_dense_total": 8388608, - "linear_nnz": 704512, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 323584, - "linear_dense_total": 8388608, - "linear_nnz": 1634304, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1685504, - "linear_dense_total": 8388608, - "linear_nnz": 2734080, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1767424, - "linear_dense_total": 8388608, - "linear_nnz": 2291712, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1873920, - "linear_dense_total": 8388608, - "linear_nnz": 2660352, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 2054144, - "linear_dense_total": 8388608, - "linear_nnz": 2578432, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1773568, - "linear_dense_total": 8388608, - "linear_nnz": 2822144, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1968128, - "linear_dense_total": 8388608, - "linear_nnz": 2492416, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 1986560, - "linear_dense_total": 8388608, - "linear_nnz": 3297280, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 73713664, - "linear_sparsity": 69.16718064692982, - "linear_total": 239075328, - "nnz": 105691291, - "total": 271133698, - "total_sparsity": 61.01875503501597 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 10, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-215000": { - "config": { - "_name_or_path": "bert-large-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 82.33680227057711, - "f1": 89.04761607630476 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": false, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 37.50764268493653, - "eval_elapsed_time": 44.93039320781827 - }, - "speedup": 1.0289741034797428, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 974848, - "linear_attention_total": 4194304, - "linear_dense_nnz": 192512, - "linear_dense_total": 8388608, - "linear_nnz": 1167360, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 306176, - "linear_attention_total": 4194304, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 576512, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 1714176, - "linear_attention_total": 4194304, - "linear_dense_nnz": 995328, - "linear_dense_total": 8388608, - "linear_nnz": 2709504, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 1875968, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1032192, - "linear_dense_total": 8388608, - "linear_nnz": 2908160, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 1832960, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1241088, - "linear_dense_total": 8388608, - "linear_nnz": 3074048, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 2155520, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1179648, - "linear_dense_total": 8388608, - "linear_nnz": 3335168, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 1942528, - "linear_attention_total": 4194304, - "linear_dense_nnz": 909312, - "linear_dense_total": 8388608, - "linear_nnz": 2851840, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2079744, - "linear_attention_total": 4194304, - "linear_dense_nnz": 681984, - "linear_dense_total": 8388608, - "linear_nnz": 2761728, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 1843200, - "linear_attention_total": 4194304, - "linear_dense_nnz": 473088, - "linear_dense_total": 8388608, - "linear_nnz": 2316288, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 1582080, - "linear_attention_total": 4194304, - "linear_dense_nnz": 368640, - "linear_dense_total": 8388608, - "linear_nnz": 1950720, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 1435648, - "linear_attention_total": 4194304, - "linear_dense_nnz": 321536, - "linear_dense_total": 8388608, - "linear_nnz": 1757184, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 717824, - "linear_attention_total": 4194304, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 988160, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 297984, - "linear_attention_total": 4194304, - "linear_dense_nnz": 286720, - "linear_dense_total": 8388608, - "linear_nnz": 584704, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 334848, - "linear_attention_total": 4194304, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 447488, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 358400, - "linear_attention_total": 4194304, - "linear_dense_nnz": 77824, - "linear_dense_total": 8388608, - "linear_nnz": 436224, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 134144, - "linear_attention_total": 4194304, - "linear_dense_nnz": 79872, - "linear_dense_total": 8388608, - "linear_nnz": 214016, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 111616, - "linear_attention_total": 4194304, - "linear_dense_nnz": 182272, - "linear_dense_total": 8388608, - "linear_nnz": 293888, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 834560, - "linear_attention_total": 4194304, - "linear_dense_nnz": 413696, - "linear_dense_total": 8388608, - "linear_nnz": 1248256, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 381952, - "linear_attention_total": 4194304, - "linear_dense_nnz": 466944, - "linear_dense_total": 8388608, - "linear_nnz": 848896, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 406528, - "linear_attention_total": 4194304, - "linear_dense_nnz": 552960, - "linear_dense_total": 8388608, - "linear_nnz": 959488, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 522240, - "linear_attention_total": 4194304, - "linear_dense_nnz": 608256, - "linear_dense_total": 8388608, - "linear_nnz": 1130496, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 771072, - "linear_attention_total": 4194304, - "linear_dense_nnz": 438272, - "linear_dense_total": 8388608, - "linear_nnz": 1209344, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 414720, - "linear_attention_total": 4194304, - "linear_dense_nnz": 661504, - "linear_dense_total": 8388608, - "linear_nnz": 1076224, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1091584, - "linear_attention_total": 4194304, - "linear_dense_nnz": 747520, - "linear_dense_total": 8388608, - "linear_nnz": 1839104, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 36684800, - "linear_sparsity": 87.85230848524306, - "linear_total": 301989888, - "nnz": 68649433, - "total": 334094338, - "total_sparsity": 79.45208128609471 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test_large/squad_test_large_regu-10", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test_large/squad_test_large_regu-10", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 8, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test_large/squad_test_large_regu-10", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-220000": { - "config": { - "_name_or_path": "bert-large-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 82.13812677388836, - "f1": 89.03656646065757 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": false, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 37.54432637023926, - "eval_elapsed_time": 44.93571184715256 - }, - "speedup": 1.0279687168915141, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 989184, - "linear_attention_total": 4194304, - "linear_dense_nnz": 192512, - "linear_dense_total": 8388608, - "linear_nnz": 1181696, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 323584, - "linear_attention_total": 4194304, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 593920, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 1745920, - "linear_attention_total": 4194304, - "linear_dense_nnz": 995328, - "linear_dense_total": 8388608, - "linear_nnz": 2741248, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 1902592, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1032192, - "linear_dense_total": 8388608, - "linear_nnz": 2934784, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 1782784, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1241088, - "linear_dense_total": 8388608, - "linear_nnz": 3023872, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 2147328, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1179648, - "linear_dense_total": 8388608, - "linear_nnz": 3326976, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 1917952, - "linear_attention_total": 4194304, - "linear_dense_nnz": 909312, - "linear_dense_total": 8388608, - "linear_nnz": 2827264, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2049024, - "linear_attention_total": 4194304, - "linear_dense_nnz": 681984, - "linear_dense_total": 8388608, - "linear_nnz": 2731008, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 1820672, - "linear_attention_total": 4194304, - "linear_dense_nnz": 473088, - "linear_dense_total": 8388608, - "linear_nnz": 2293760, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 1562624, - "linear_attention_total": 4194304, - "linear_dense_nnz": 368640, - "linear_dense_total": 8388608, - "linear_nnz": 1931264, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 1390592, - "linear_attention_total": 4194304, - "linear_dense_nnz": 321536, - "linear_dense_total": 8388608, - "linear_nnz": 1712128, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 688128, - "linear_attention_total": 4194304, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 958464, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 286720, - "linear_attention_total": 4194304, - "linear_dense_nnz": 286720, - "linear_dense_total": 8388608, - "linear_nnz": 573440, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 326656, - "linear_attention_total": 4194304, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 439296, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 344064, - "linear_attention_total": 4194304, - "linear_dense_nnz": 77824, - "linear_dense_total": 8388608, - "linear_nnz": 421888, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 129024, - "linear_attention_total": 4194304, - "linear_dense_nnz": 79872, - "linear_dense_total": 8388608, - "linear_nnz": 208896, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 116736, - "linear_attention_total": 4194304, - "linear_dense_nnz": 182272, - "linear_dense_total": 8388608, - "linear_nnz": 299008, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 801792, - "linear_attention_total": 4194304, - "linear_dense_nnz": 413696, - "linear_dense_total": 8388608, - "linear_nnz": 1215488, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 396288, - "linear_attention_total": 4194304, - "linear_dense_nnz": 466944, - "linear_dense_total": 8388608, - "linear_nnz": 863232, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 405504, - "linear_attention_total": 4194304, - "linear_dense_nnz": 552960, - "linear_dense_total": 8388608, - "linear_nnz": 958464, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 520192, - "linear_attention_total": 4194304, - "linear_dense_nnz": 604160, - "linear_dense_total": 8388608, - "linear_nnz": 1124352, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 764928, - "linear_attention_total": 4194304, - "linear_dense_nnz": 438272, - "linear_dense_total": 8388608, - "linear_nnz": 1203200, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 423936, - "linear_attention_total": 4194304, - "linear_dense_nnz": 659456, - "linear_dense_total": 8388608, - "linear_nnz": 1083392, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1070080, - "linear_attention_total": 4194304, - "linear_dense_nnz": 747520, - "linear_dense_total": 8388608, - "linear_nnz": 1817600, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 36464640, - "linear_sparsity": 87.92521158854166, - "linear_total": 301989888, - "nnz": 68429014, - "total": 334094338, - "total_sparsity": 79.51805636406804 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test_large/squad_test_large_regu-10", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test_large/squad_test_large_regu-10", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 8, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test_large/squad_test_large_regu-10", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-221320": { - "config": { - "_name_or_path": "bert-large-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 82.30842005676443, - "f1": 89.04987146464723 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": false, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 37.53598588562012, - "eval_elapsed_time": 44.935436787083745 - }, - "speedup": 1.028197131226982, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 978944, - "linear_attention_total": 4194304, - "linear_dense_nnz": 192512, - "linear_dense_total": 8388608, - "linear_nnz": 1171456, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 319488, - "linear_attention_total": 4194304, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 589824, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 1754112, - "linear_attention_total": 4194304, - "linear_dense_nnz": 995328, - "linear_dense_total": 8388608, - "linear_nnz": 2749440, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 1922048, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1032192, - "linear_dense_total": 8388608, - "linear_nnz": 2954240, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 1775616, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1241088, - "linear_dense_total": 8388608, - "linear_nnz": 3016704, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 2149376, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1179648, - "linear_dense_total": 8388608, - "linear_nnz": 3329024, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 1954816, - "linear_attention_total": 4194304, - "linear_dense_nnz": 909312, - "linear_dense_total": 8388608, - "linear_nnz": 2864128, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2065408, - "linear_attention_total": 4194304, - "linear_dense_nnz": 681984, - "linear_dense_total": 8388608, - "linear_nnz": 2747392, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 1823744, - "linear_attention_total": 4194304, - "linear_dense_nnz": 473088, - "linear_dense_total": 8388608, - "linear_nnz": 2296832, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 1558528, - "linear_attention_total": 4194304, - "linear_dense_nnz": 368640, - "linear_dense_total": 8388608, - "linear_nnz": 1927168, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 1356800, - "linear_attention_total": 4194304, - "linear_dense_nnz": 321536, - "linear_dense_total": 8388608, - "linear_nnz": 1678336, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 688128, - "linear_attention_total": 4194304, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 958464, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 293888, - "linear_attention_total": 4194304, - "linear_dense_nnz": 286720, - "linear_dense_total": 8388608, - "linear_nnz": 580608, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 326656, - "linear_attention_total": 4194304, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 439296, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 344064, - "linear_attention_total": 4194304, - "linear_dense_nnz": 77824, - "linear_dense_total": 8388608, - "linear_nnz": 421888, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 129024, - "linear_attention_total": 4194304, - "linear_dense_nnz": 79872, - "linear_dense_total": 8388608, - "linear_nnz": 208896, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 116736, - "linear_attention_total": 4194304, - "linear_dense_nnz": 182272, - "linear_dense_total": 8388608, - "linear_nnz": 299008, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 795648, - "linear_attention_total": 4194304, - "linear_dense_nnz": 413696, - "linear_dense_total": 8388608, - "linear_nnz": 1209344, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 395264, - "linear_attention_total": 4194304, - "linear_dense_nnz": 466944, - "linear_dense_total": 8388608, - "linear_nnz": 862208, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 392192, - "linear_attention_total": 4194304, - "linear_dense_nnz": 552960, - "linear_dense_total": 8388608, - "linear_nnz": 945152, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 523264, - "linear_attention_total": 4194304, - "linear_dense_nnz": 604160, - "linear_dense_total": 8388608, - "linear_nnz": 1127424, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 784384, - "linear_attention_total": 4194304, - "linear_dense_nnz": 438272, - "linear_dense_total": 8388608, - "linear_nnz": 1222656, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 416768, - "linear_attention_total": 4194304, - "linear_dense_nnz": 659456, - "linear_dense_total": 8388608, - "linear_nnz": 1076224, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1069056, - "linear_attention_total": 4194304, - "linear_dense_nnz": 747520, - "linear_dense_total": 8388608, - "linear_nnz": 1816576, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 36492288, - "linear_sparsity": 87.91605631510416, - "linear_total": 301989888, - "nnz": 68456822, - "total": 334094338, - "total_sparsity": 79.50973296650122 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test_large/squad_test_large_regu-10", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test_large/squad_test_large_regu-10", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 8, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test_large/squad_test_large_regu-10", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10_d0.25/checkpoint-210000": { - "config": { - "_name_or_path": "bert-large-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 83.78429517502366, - "f1": 90.32458147221426 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": false, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 41.496326583862306, - "eval_elapsed_time": 49.08256564009935 - }, - "speedup": 0.9300676995438012, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 783360, - "linear_attention_total": 4194304, - "linear_dense_nnz": 835584, - "linear_dense_total": 8388608, - "linear_nnz": 1618944, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 326656, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1275904, - "linear_dense_total": 8388608, - "linear_nnz": 1602560, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 1636352, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2410496, - "linear_dense_total": 8388608, - "linear_nnz": 4046848, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 1575936, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2510848, - "linear_dense_total": 8388608, - "linear_nnz": 4086784, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 1203200, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2660352, - "linear_dense_total": 8388608, - "linear_nnz": 3863552, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 2030592, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2605056, - "linear_dense_total": 8388608, - "linear_nnz": 4635648, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 1785856, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2299904, - "linear_dense_total": 8388608, - "linear_nnz": 4085760, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 1946624, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1699840, - "linear_dense_total": 8388608, - "linear_nnz": 3646464, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 1647616, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1402880, - "linear_dense_total": 8388608, - "linear_nnz": 3050496, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 1538048, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1097728, - "linear_dense_total": 8388608, - "linear_nnz": 2635776, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 1169408, - "linear_attention_total": 4194304, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 2070528, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 607232, - "linear_attention_total": 4194304, - "linear_dense_nnz": 739328, - "linear_dense_total": 8388608, - "linear_nnz": 1346560, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 305152, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1359872, - "linear_dense_total": 8388608, - "linear_nnz": 1665024, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 396288, - "linear_attention_total": 4194304, - "linear_dense_nnz": 358400, - "linear_dense_total": 8388608, - "linear_nnz": 754688, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 284672, - "linear_attention_total": 4194304, - "linear_dense_nnz": 194560, - "linear_dense_total": 8388608, - "linear_nnz": 479232, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 70656, - "linear_attention_total": 4194304, - "linear_dense_nnz": 180224, - "linear_dense_total": 8388608, - "linear_nnz": 250880, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 111616, - "linear_attention_total": 4194304, - "linear_dense_nnz": 323584, - "linear_dense_total": 8388608, - "linear_nnz": 435200, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 626688, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1685504, - "linear_dense_total": 8388608, - "linear_nnz": 2312192, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 369664, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1767424, - "linear_dense_total": 8388608, - "linear_nnz": 2137088, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 463872, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1873920, - "linear_dense_total": 8388608, - "linear_nnz": 2337792, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 294912, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2054144, - "linear_dense_total": 8388608, - "linear_nnz": 2349056, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 613376, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1773568, - "linear_dense_total": 8388608, - "linear_nnz": 2386944, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 208896, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1968128, - "linear_dense_total": 8388608, - "linear_nnz": 2177024, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 923648, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1986560, - "linear_dense_total": 8388608, - "linear_nnz": 2910208, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 56885248, - "linear_sparsity": 81.16319444444444, - "linear_total": 301989888, - "nnz": 88857851, - "total": 334094338, - "total_sparsity": 73.40336518962498 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 10000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test_large/squad_test_large_regu_10_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test_large/squad_test_large_regu_10_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 8, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test_large/squad_test_large_regu_10_d0.25", - "save_steps": 10000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10_d0.25/checkpoint-221320": { - "config": { - "_name_or_path": "bert-large-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 83.66130558183538, - "f1": 90.22195941338013 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": false, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 41.275371505737304, - "eval_elapsed_time": 48.98561626393348 - }, - "speedup": 0.9350465325310627, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 766976, - "linear_attention_total": 4194304, - "linear_dense_nnz": 831488, - "linear_dense_total": 8388608, - "linear_nnz": 1598464, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 338944, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1273856, - "linear_dense_total": 8388608, - "linear_nnz": 1612800, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 1596416, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2408448, - "linear_dense_total": 8388608, - "linear_nnz": 4004864, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 1615872, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2508800, - "linear_dense_total": 8388608, - "linear_nnz": 4124672, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 1205248, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2658304, - "linear_dense_total": 8388608, - "linear_nnz": 3863552, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 2006016, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2603008, - "linear_dense_total": 8388608, - "linear_nnz": 4609024, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 1718272, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2299904, - "linear_dense_total": 8388608, - "linear_nnz": 4018176, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 1935360, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1699840, - "linear_dense_total": 8388608, - "linear_nnz": 3635200, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 1612800, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1402880, - "linear_dense_total": 8388608, - "linear_nnz": 3015680, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 1502208, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1097728, - "linear_dense_total": 8388608, - "linear_nnz": 2599936, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 1167360, - "linear_attention_total": 4194304, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 2068480, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 601088, - "linear_attention_total": 4194304, - "linear_dense_nnz": 739328, - "linear_dense_total": 8388608, - "linear_nnz": 1340416, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 305152, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1357824, - "linear_dense_total": 8388608, - "linear_nnz": 1662976, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 364544, - "linear_attention_total": 4194304, - "linear_dense_nnz": 356352, - "linear_dense_total": 8388608, - "linear_nnz": 720896, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 274432, - "linear_attention_total": 4194304, - "linear_dense_nnz": 194560, - "linear_dense_total": 8388608, - "linear_nnz": 468992, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 70656, - "linear_attention_total": 4194304, - "linear_dense_nnz": 180224, - "linear_dense_total": 8388608, - "linear_nnz": 250880, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 102400, - "linear_attention_total": 4194304, - "linear_dense_nnz": 321536, - "linear_dense_total": 8388608, - "linear_nnz": 423936, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 621568, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1685504, - "linear_dense_total": 8388608, - "linear_nnz": 2307072, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 377856, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1767424, - "linear_dense_total": 8388608, - "linear_nnz": 2145280, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 460800, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1871872, - "linear_dense_total": 8388608, - "linear_nnz": 2332672, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 309248, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2054144, - "linear_dense_total": 8388608, - "linear_nnz": 2363392, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 583680, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1773568, - "linear_dense_total": 8388608, - "linear_nnz": 2357248, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 215040, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1966080, - "linear_dense_total": 8388608, - "linear_nnz": 2181120, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 916480, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1986560, - "linear_dense_total": 8388608, - "linear_nnz": 2903040, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 56608768, - "linear_sparsity": 81.25474717881944, - "linear_total": 301989888, - "nnz": 88581359, - "total": 334094338, - "total_sparsity": 73.4861238504437 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 10000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test_large/squad_test_large_regu_10_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test_large/squad_test_large_regu_10_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 8, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test_large/squad_test_large_regu_10_d0.25", - "save_steps": 10000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_40/checkpoint-221320": { - "config": { - "_name_or_path": "bert-large-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.92147587511826, - "f1": 86.66302391758462 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": false, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 28.611265159606933, - "eval_elapsed_time": 36.00721236690879 - }, - "speedup": 1.3489229780673324, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 668672, - "linear_attention_total": 4194304, - "linear_dense_nnz": 88064, - "linear_dense_total": 8388608, - "linear_nnz": 756736, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 232448, - "linear_attention_total": 4194304, - "linear_dense_nnz": 102400, - "linear_dense_total": 8388608, - "linear_nnz": 334848, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 864256, - "linear_attention_total": 4194304, - "linear_dense_nnz": 442368, - "linear_dense_total": 8388608, - "linear_nnz": 1306624, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 985088, - "linear_attention_total": 4194304, - "linear_dense_nnz": 462848, - "linear_dense_total": 8388608, - "linear_nnz": 1447936, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 726016, - "linear_attention_total": 4194304, - "linear_dense_nnz": 557056, - "linear_dense_total": 8388608, - "linear_nnz": 1283072, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 1306624, - "linear_attention_total": 4194304, - "linear_dense_nnz": 507904, - "linear_dense_total": 8388608, - "linear_nnz": 1814528, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 1107968, - "linear_attention_total": 4194304, - "linear_dense_nnz": 362496, - "linear_dense_total": 8388608, - "linear_nnz": 1470464, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 1074176, - "linear_attention_total": 4194304, - "linear_dense_nnz": 278528, - "linear_dense_total": 8388608, - "linear_nnz": 1352704, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 951296, - "linear_attention_total": 4194304, - "linear_dense_nnz": 188416, - "linear_dense_total": 8388608, - "linear_nnz": 1139712, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 795648, - "linear_attention_total": 4194304, - "linear_dense_nnz": 188416, - "linear_dense_total": 8388608, - "linear_nnz": 984064, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 706560, - "linear_attention_total": 4194304, - "linear_dense_nnz": 141312, - "linear_dense_total": 8388608, - "linear_nnz": 847872, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 290816, - "linear_attention_total": 4194304, - "linear_dense_nnz": 137216, - "linear_dense_total": 8388608, - "linear_nnz": 428032, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 146432, - "linear_attention_total": 4194304, - "linear_dense_nnz": 90112, - "linear_dense_total": 8388608, - "linear_nnz": 236544, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 186368, - "linear_attention_total": 4194304, - "linear_dense_nnz": 57344, - "linear_dense_total": 8388608, - "linear_nnz": 243712, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 194560, - "linear_attention_total": 4194304, - "linear_dense_nnz": 40960, - "linear_dense_total": 8388608, - "linear_nnz": 235520, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 46080, - "linear_attention_total": 4194304, - "linear_dense_nnz": 40960, - "linear_dense_total": 8388608, - "linear_nnz": 87040, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 54272, - "linear_attention_total": 4194304, - "linear_dense_nnz": 102400, - "linear_dense_total": 8388608, - "linear_nnz": 156672, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 359424, - "linear_attention_total": 4194304, - "linear_dense_nnz": 155648, - "linear_dense_total": 8388608, - "linear_nnz": 515072, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 349184, - "linear_attention_total": 4194304, - "linear_dense_nnz": 143360, - "linear_dense_total": 8388608, - "linear_nnz": 492544, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 270336, - "linear_attention_total": 4194304, - "linear_dense_nnz": 167936, - "linear_dense_total": 8388608, - "linear_nnz": 438272, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 196608, - "linear_attention_total": 4194304, - "linear_dense_nnz": 212992, - "linear_dense_total": 8388608, - "linear_nnz": 409600, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 494592, - "linear_attention_total": 4194304, - "linear_dense_nnz": 178176, - "linear_dense_total": 8388608, - "linear_nnz": 672768, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 173056, - "linear_attention_total": 4194304, - "linear_dense_nnz": 229376, - "linear_dense_total": 8388608, - "linear_nnz": 402432, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 630784, - "linear_attention_total": 4194304, - "linear_dense_nnz": 370688, - "linear_dense_total": 8388608, - "linear_nnz": 1001472, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 18058240, - "linear_sparsity": 94.02025010850694, - "linear_total": 301989888, - "nnz": 50008420, - "total": 334094338, - "total_sparsity": 85.0316469595483 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 10000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test_large/squad_test_large_regu_40", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test_large/squad_test_large_regu_40", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 8, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test_large/squad_test_large_regu_40", - "save_steps": 10000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_40_d0.25/checkpoint-220000": { - "config": { - "_name_or_path": "bert-large-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.59602649006622, - "f1": 87.8561484925226 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": false, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 29.83378296661377, - "eval_elapsed_time": 37.31617963500321 - }, - "speedup": 1.2936473074353696, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 340992, - "linear_attention_total": 4194304, - "linear_dense_nnz": 253952, - "linear_dense_total": 8388608, - "linear_nnz": 594944, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 141312, - "linear_attention_total": 4194304, - "linear_dense_nnz": 432128, - "linear_dense_total": 8388608, - "linear_nnz": 573440, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 832512, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1210368, - "linear_dense_total": 8388608, - "linear_nnz": 2042880, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 765952, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1277952, - "linear_dense_total": 8388608, - "linear_nnz": 2043904, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 720896, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1400832, - "linear_dense_total": 8388608, - "linear_nnz": 2121728, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 1234944, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1464320, - "linear_dense_total": 8388608, - "linear_nnz": 2699264, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 879616, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1122304, - "linear_dense_total": 8388608, - "linear_nnz": 2001920, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 917504, - "linear_attention_total": 4194304, - "linear_dense_nnz": 778240, - "linear_dense_total": 8388608, - "linear_nnz": 1695744, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 793600, - "linear_attention_total": 4194304, - "linear_dense_nnz": 532480, - "linear_dense_total": 8388608, - "linear_nnz": 1326080, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 726016, - "linear_attention_total": 4194304, - "linear_dense_nnz": 456704, - "linear_dense_total": 8388608, - "linear_nnz": 1182720, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 656384, - "linear_attention_total": 4194304, - "linear_dense_nnz": 440320, - "linear_dense_total": 8388608, - "linear_nnz": 1096704, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 281600, - "linear_attention_total": 4194304, - "linear_dense_nnz": 362496, - "linear_dense_total": 8388608, - "linear_nnz": 644096, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 238592, - "linear_attention_total": 4194304, - "linear_dense_nnz": 450560, - "linear_dense_total": 8388608, - "linear_nnz": 689152, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 137216, - "linear_attention_total": 4194304, - "linear_dense_nnz": 184320, - "linear_dense_total": 8388608, - "linear_nnz": 321536, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 175104, - "linear_attention_total": 4194304, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 287744, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 54272, - "linear_attention_total": 4194304, - "linear_dense_nnz": 114688, - "linear_dense_total": 8388608, - "linear_nnz": 168960, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 24576, - "linear_attention_total": 4194304, - "linear_dense_nnz": 184320, - "linear_dense_total": 8388608, - "linear_nnz": 208896, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 320512, - "linear_attention_total": 4194304, - "linear_dense_nnz": 548864, - "linear_dense_total": 8388608, - "linear_nnz": 869376, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 332800, - "linear_attention_total": 4194304, - "linear_dense_nnz": 614400, - "linear_dense_total": 8388608, - "linear_nnz": 947200, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 147456, - "linear_attention_total": 4194304, - "linear_dense_nnz": 839680, - "linear_dense_total": 8388608, - "linear_nnz": 987136, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 166912, - "linear_attention_total": 4194304, - "linear_dense_nnz": 858112, - "linear_dense_total": 8388608, - "linear_nnz": 1025024, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 376832, - "linear_attention_total": 4194304, - "linear_dense_nnz": 636928, - "linear_dense_total": 8388608, - "linear_nnz": 1013760, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 145408, - "linear_attention_total": 4194304, - "linear_dense_nnz": 847872, - "linear_dense_total": 8388608, - "linear_nnz": 993280, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 466944, - "linear_attention_total": 4194304, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 1368064, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 26903552, - "linear_sparsity": 91.09124077690971, - "linear_total": 301989888, - "nnz": 58856371, - "total": 334094338, - "total_sparsity": 82.38330785480117 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 10000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test_large/squad_test_large_regu_40_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test_large/squad_test_large_regu_40_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 8, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test_large/squad_test_large_regu_40_d0.25", - "save_steps": 10000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_40_d0.25/checkpoint-221320": { - "config": { - "_name_or_path": "bert-large-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.4635761589404, - "f1": 87.71992570037945 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": false, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 29.83577773284912, - "eval_elapsed_time": 37.33651804598048 - }, - "speedup": 1.293560816511874, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 365568, - "linear_attention_total": 4194304, - "linear_dense_nnz": 253952, - "linear_dense_total": 8388608, - "linear_nnz": 619520, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 137216, - "linear_attention_total": 4194304, - "linear_dense_nnz": 432128, - "linear_dense_total": 8388608, - "linear_nnz": 569344, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 826368, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1210368, - "linear_dense_total": 8388608, - "linear_nnz": 2036736, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 764928, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1277952, - "linear_dense_total": 8388608, - "linear_nnz": 2042880, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 737280, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1400832, - "linear_dense_total": 8388608, - "linear_nnz": 2138112, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 1224704, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1464320, - "linear_dense_total": 8388608, - "linear_nnz": 2689024, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 869376, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1122304, - "linear_dense_total": 8388608, - "linear_nnz": 1991680, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 924672, - "linear_attention_total": 4194304, - "linear_dense_nnz": 778240, - "linear_dense_total": 8388608, - "linear_nnz": 1702912, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 782336, - "linear_attention_total": 4194304, - "linear_dense_nnz": 532480, - "linear_dense_total": 8388608, - "linear_nnz": 1314816, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 720896, - "linear_attention_total": 4194304, - "linear_dense_nnz": 456704, - "linear_dense_total": 8388608, - "linear_nnz": 1177600, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 655360, - "linear_attention_total": 4194304, - "linear_dense_nnz": 440320, - "linear_dense_total": 8388608, - "linear_nnz": 1095680, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 288768, - "linear_attention_total": 4194304, - "linear_dense_nnz": 362496, - "linear_dense_total": 8388608, - "linear_nnz": 651264, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 242688, - "linear_attention_total": 4194304, - "linear_dense_nnz": 450560, - "linear_dense_total": 8388608, - "linear_nnz": 693248, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 141312, - "linear_attention_total": 4194304, - "linear_dense_nnz": 184320, - "linear_dense_total": 8388608, - "linear_nnz": 325632, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 173056, - "linear_attention_total": 4194304, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 285696, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 55296, - "linear_attention_total": 4194304, - "linear_dense_nnz": 114688, - "linear_dense_total": 8388608, - "linear_nnz": 169984, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 24576, - "linear_attention_total": 4194304, - "linear_dense_nnz": 184320, - "linear_dense_total": 8388608, - "linear_nnz": 208896, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 327680, - "linear_attention_total": 4194304, - "linear_dense_nnz": 548864, - "linear_dense_total": 8388608, - "linear_nnz": 876544, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 332800, - "linear_attention_total": 4194304, - "linear_dense_nnz": 614400, - "linear_dense_total": 8388608, - "linear_nnz": 947200, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 139264, - "linear_attention_total": 4194304, - "linear_dense_nnz": 839680, - "linear_dense_total": 8388608, - "linear_nnz": 978944, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 165888, - "linear_attention_total": 4194304, - "linear_dense_nnz": 858112, - "linear_dense_total": 8388608, - "linear_nnz": 1024000, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 381952, - "linear_attention_total": 4194304, - "linear_dense_nnz": 636928, - "linear_dense_total": 8388608, - "linear_nnz": 1018880, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 134144, - "linear_attention_total": 4194304, - "linear_dense_nnz": 847872, - "linear_dense_total": 8388608, - "linear_nnz": 982016, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 489472, - "linear_attention_total": 4194304, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 1390592, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 26931200, - "linear_sparsity": 91.08208550347221, - "linear_total": 301989888, - "nnz": 58884211, - "total": 334094338, - "total_sparsity": 82.37497487910136 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 10000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test_large/squad_test_large_regu_40_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test_large/squad_test_large_regu_40_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 8, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test_large/squad_test_large_regu_40_d0.25", - "save_steps": 10000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - } - } -} \ No newline at end of file diff --git a/analysis/files/results/results.json b/analysis/files/results/results.json deleted file mode 100644 index 4aaa2f9c..00000000 --- a/analysis/files/results/results.json +++ /dev/null @@ -1 +0,0 @@ -{"checkpoints": {"/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42067458, "linear_total": 84934656, "linear_nnz": 18108928, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437184, "linear_attention_total": 2359296, "linear_attention_nnz": 472576, "linear_dense_total": 4718592, "linear_dense_nnz": 964608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1754624, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015488, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2276608, "linear_attention_total": 2359296, "linear_attention_nnz": 951040, "linear_dense_total": 4718592, "linear_dense_nnz": 1325568}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2280448, "linear_attention_total": 2359296, "linear_attention_nnz": 861184, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2123008, "linear_attention_total": 2359296, "linear_attention_nnz": 779008, "linear_dense_total": 4718592, "linear_dense_nnz": 1344000}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841152, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1041408}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553664, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042432, "linear_attention_total": 2359296, "linear_attention_nnz": 610816, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 584960, "linear_attention_total": 2359296, "linear_attention_nnz": 405248, "linear_dense_total": 4718592, "linear_dense_nnz": 179712}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 540928, "linear_attention_total": 2359296, "linear_attention_nnz": 395008, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 658432, "linear_attention_total": 2359296, "linear_attention_nnz": 217600, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}}, "total_sparsity": 61.3681447432349, "linear_sparsity": 78.6789882330247}, "speed": {"eval_elapsed_time": 18.076128184970003}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41735426, "linear_total": 84934656, "linear_nnz": 17776896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1405440, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 929280}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1732352, "linear_attention_total": 2359296, "linear_attention_nnz": 589568, "linear_dense_total": 4718592, "linear_dense_nnz": 1142784}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1979136, "linear_attention_total": 2359296, "linear_attention_nnz": 628992, "linear_dense_total": 4718592, "linear_dense_nnz": 1350144}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2218752, "linear_attention_total": 2359296, "linear_attention_nnz": 913152, "linear_dense_total": 4718592, "linear_dense_nnz": 1305600}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2257664, "linear_attention_total": 2359296, "linear_attention_nnz": 850688, "linear_dense_total": 4718592, "linear_dense_nnz": 1406976}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096384, "linear_attention_total": 2359296, "linear_attention_nnz": 764672, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1786112, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 1022976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1538816, "linear_attention_total": 2359296, "linear_attention_nnz": 781568, "linear_dense_total": 4718592, "linear_dense_nnz": 757248}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1027840, "linear_attention_total": 2359296, "linear_attention_nnz": 596224, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 176640}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523008, "linear_attention_total": 2359296, "linear_attention_nnz": 378624, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640000, "linear_attention_total": 2359296, "linear_attention_nnz": 208384, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.67306005721974, "linear_sparsity": 79.0699146412037}, "speed": {"eval_elapsed_time": 17.46750119898934}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 43462146, "linear_total": 84934656, "linear_nnz": 19503616, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660672, "linear_attention_total": 2359296, "linear_attention_nnz": 579328, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 632576, "linear_dense_total": 4718592, "linear_dense_nnz": 1267200}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2031104, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1446912}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2544128, "linear_attention_total": 2359296, "linear_attention_nnz": 1049600, "linear_dense_total": 4718592, "linear_dense_nnz": 1494528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2395904, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 1479168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2184960, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 1394688}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1912320, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1113600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 969216, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 471808, "linear_dense_total": 4718592, "linear_dense_nnz": 497664}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 717312, "linear_attention_total": 2359296, "linear_attention_nnz": 505344, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631040, "linear_attention_total": 2359296, "linear_attention_nnz": 448256, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 288256, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}}, "total_sparsity": 60.08735936884057, "linear_sparsity": 77.03691647376543}, "speed": {"eval_elapsed_time": 15.94129539799178}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42678018, "linear_total": 84934656, "linear_nnz": 18719488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1560320, "linear_attention_total": 2359296, "linear_attention_nnz": 543488, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1816320, "linear_attention_total": 2359296, "linear_attention_nnz": 593664, "linear_dense_total": 4718592, "linear_dense_nnz": 1222656}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2024704, "linear_attention_total": 2359296, "linear_attention_nnz": 603904, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2329856, "linear_attention_total": 2359296, "linear_attention_nnz": 870656, "linear_dense_total": 4718592, "linear_dense_nnz": 1459200}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2332928, "linear_attention_total": 2359296, "linear_attention_nnz": 887552, "linear_dense_total": 4718592, "linear_dense_nnz": 1445376}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 720640, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742080, "linear_attention_total": 2359296, "linear_attention_nnz": 926464, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 944384, "linear_attention_total": 2359296, "linear_attention_nnz": 455936, "linear_dense_total": 4718592, "linear_dense_nnz": 488448}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705280, "linear_attention_total": 2359296, "linear_attention_nnz": 505600, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 587264, "linear_attention_total": 2359296, "linear_attention_nnz": 409088, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 697856, "linear_attention_total": 2359296, "linear_attention_nnz": 250880, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}}, "total_sparsity": 60.80744850279245, "linear_sparsity": 77.96012972608024}, "speed": {"eval_elapsed_time": 15.653043513011653}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 46192898, "linear_total": 84934656, "linear_nnz": 22234368, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 551680, "linear_dense_total": 4718592, "linear_dense_nnz": 1539072}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2278656, "linear_attention_total": 2359296, "linear_attention_nnz": 596736, "linear_dense_total": 4718592, "linear_dense_nnz": 1681920}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2418688, "linear_attention_total": 2359296, "linear_attention_nnz": 567808, "linear_dense_total": 4718592, "linear_dense_nnz": 1850880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2844416, "linear_attention_total": 2359296, "linear_attention_nnz": 1002752, "linear_dense_total": 4718592, "linear_dense_nnz": 1841664}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2691072, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 1812480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2475264, "linear_attention_total": 2359296, "linear_attention_nnz": 721152, "linear_dense_total": 4718592, "linear_dense_nnz": 1754112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2229248, "linear_attention_total": 2359296, "linear_attention_nnz": 805376, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1966336, "linear_attention_total": 2359296, "linear_attention_nnz": 892672, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701440, "linear_attention_total": 2359296, "linear_attention_nnz": 454144, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598272, "linear_attention_total": 2359296, "linear_attention_nnz": 361728, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858880, "linear_attention_total": 2359296, "linear_attention_nnz": 238336, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}}, "total_sparsity": 57.57962486284496, "linear_sparsity": 73.82179542824075}, "speed": {"eval_elapsed_time": 17.350134194013663}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39741442, "linear_total": 84934656, "linear_nnz": 15782912, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1171968, "linear_attention_total": 2359296, "linear_attention_nnz": 511488, "linear_dense_total": 4718592, "linear_dense_nnz": 660480}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1531136, "linear_attention_total": 2359296, "linear_attention_nnz": 591104, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1722112, "linear_attention_total": 2359296, "linear_attention_nnz": 656128, "linear_dense_total": 4718592, "linear_dense_nnz": 1065984}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2136320, "linear_attention_total": 2359296, "linear_attention_nnz": 985856, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2026752, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1168896}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1822976, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 1138176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488384, "linear_attention_total": 2359296, "linear_attention_nnz": 668160, "linear_dense_total": 4718592, "linear_dense_nnz": 820224}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1423104, "linear_attention_total": 2359296, "linear_attention_nnz": 793344, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 861184, "linear_attention_total": 2359296, "linear_attention_nnz": 494080, "linear_dense_total": 4718592, "linear_dense_nnz": 367104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 570880, "linear_attention_total": 2359296, "linear_attention_nnz": 417280, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 499968, "linear_attention_total": 2359296, "linear_attention_nnz": 370944, "linear_dense_total": 4718592, "linear_dense_nnz": 129024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 528128, "linear_attention_total": 2359296, "linear_attention_nnz": 224000, "linear_dense_total": 4718592, "linear_dense_nnz": 304128}}, "total_sparsity": 63.504197590471826, "linear_sparsity": 81.41758294753086}, "speed": {"eval_elapsed_time": 15.804350501974113}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-130000": {"stats": {"total": 108893186, "nnz": 38778370, "linear_total": 84934656, "linear_nnz": 14819840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1050624, "linear_attention_total": 2359296, "linear_attention_nnz": 488448, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 512512, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1628160, "linear_attention_total": 2359296, "linear_attention_nnz": 628224, "linear_dense_total": 4718592, "linear_dense_nnz": 999936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998592, "linear_attention_total": 2359296, "linear_attention_nnz": 937216, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1939968, "linear_attention_total": 2359296, "linear_attention_nnz": 821760, "linear_dense_total": 4718592, "linear_dense_nnz": 1118208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1709824, "linear_attention_total": 2359296, "linear_attention_nnz": 648448, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404928, "linear_attention_total": 2359296, "linear_attention_nnz": 641536, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817920, "linear_attention_total": 2359296, "linear_attention_nnz": 467712, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 544512, "linear_attention_total": 2359296, "linear_attention_nnz": 403200, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484096, "linear_attention_total": 2359296, "linear_attention_nnz": 367360, "linear_dense_total": 4718592, "linear_dense_nnz": 116736}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496896, "linear_attention_total": 2359296, "linear_attention_nnz": 225024, "linear_dense_total": 4718592, "linear_dense_nnz": 271872}}, "total_sparsity": 64.38861656596218, "linear_sparsity": 82.5514805169753}, "speed": {"eval_elapsed_time": 15.662423020985443}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-165000": {"stats": {"total": 108893186, "nnz": 38293506, "linear_total": 84934656, "linear_nnz": 14334976, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1010688, "linear_attention_total": 2359296, "linear_attention_nnz": 468480, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1371392, "linear_attention_total": 2359296, "linear_attention_nnz": 518912, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1590272, "linear_attention_total": 2359296, "linear_attention_nnz": 608768, "linear_dense_total": 4718592, "linear_dense_nnz": 981504}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895936, "linear_attention_total": 2359296, "linear_attention_nnz": 869888, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1869568, "linear_attention_total": 2359296, "linear_attention_nnz": 775936, "linear_dense_total": 4718592, "linear_dense_nnz": 1093632}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663232, "linear_attention_total": 2359296, "linear_attention_nnz": 618752, "linear_dense_total": 4718592, "linear_dense_nnz": 1044480}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 629248, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1295872, "linear_attention_total": 2359296, "linear_attention_nnz": 707584, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 808704, "linear_attention_total": 2359296, "linear_attention_nnz": 463104, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 515840, "linear_attention_total": 2359296, "linear_attention_nnz": 376064, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455936, "linear_attention_total": 2359296, "linear_attention_nnz": 345344, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 212992, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 64.83388225963009, "linear_sparsity": 83.1223476080247}, "speed": {"eval_elapsed_time": 15.62424924300285}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38916354, "linear_total": 84934656, "linear_nnz": 14957824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1209344, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 749568}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1494272, "linear_attention_total": 2359296, "linear_attention_nnz": 488192, "linear_dense_total": 4718592, "linear_dense_nnz": 1006080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1636096, "linear_attention_total": 2359296, "linear_attention_nnz": 550144, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969664, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1746944, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 1198080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1782272, "linear_attention_total": 2359296, "linear_attention_nnz": 653312, "linear_dense_total": 4718592, "linear_dense_nnz": 1128960}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1461760, "linear_attention_total": 2359296, "linear_attention_nnz": 593920, "linear_dense_total": 4718592, "linear_dense_nnz": 867840}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 754688, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531968, "linear_attention_total": 2359296, "linear_attention_nnz": 373760, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460032, "linear_attention_total": 2359296, "linear_attention_nnz": 311040, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}}, "total_sparsity": 64.26190156654981, "linear_sparsity": 82.38902150848766}, "speed": {"eval_elapsed_time": 15.315251532010734}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 50872322, "linear_total": 84934656, "linear_nnz": 26913792, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692352, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 2007552}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666496, "linear_attention_total": 2359296, "linear_attention_nnz": 646656, "linear_dense_total": 4718592, "linear_dense_nnz": 2019840}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2931200, "linear_attention_total": 2359296, "linear_attention_nnz": 691712, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3361024, "linear_attention_total": 2359296, "linear_attention_nnz": 1149184, "linear_dense_total": 4718592, "linear_dense_nnz": 2211840}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3165952, "linear_attention_total": 2359296, "linear_attention_nnz": 1007872, "linear_dense_total": 4718592, "linear_dense_nnz": 2158080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070976, "linear_attention_total": 2359296, "linear_attention_nnz": 997376, "linear_dense_total": 4718592, "linear_dense_nnz": 2073600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2644480, "linear_attention_total": 2359296, "linear_attention_nnz": 911872, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2248704, "linear_attention_total": 2359296, "linear_attention_nnz": 944640, "linear_dense_total": 4718592, "linear_dense_nnz": 1304064}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1514240, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 751104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 839424, "linear_attention_total": 2359296, "linear_attention_nnz": 526080, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 707072, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 274944}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1071872, "linear_attention_total": 2359296, "linear_attention_nnz": 277760, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}}, "total_sparsity": 53.282364242699266, "linear_sparsity": 68.31235532407408}, "speed": {"eval_elapsed_time": 19.643985862960108}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl225_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 27752545, "linear_total": 84934656, "linear_nnz": 3794015, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 465383, "linear_attention_total": 2359296, "linear_attention_nnz": 18728, "linear_dense_total": 4718592, "linear_dense_nnz": 446655}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527397, "linear_attention_total": 2359296, "linear_attention_nnz": 63059, "linear_dense_total": 4718592, "linear_dense_nnz": 464338}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516492, "linear_attention_total": 2359296, "linear_attention_nnz": 53761, "linear_dense_total": 4718592, "linear_dense_nnz": 462731}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 514972, "linear_attention_total": 2359296, "linear_attention_nnz": 84624, "linear_dense_total": 4718592, "linear_dense_nnz": 430348}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 443214, "linear_attention_total": 2359296, "linear_attention_nnz": 58345, "linear_dense_total": 4718592, "linear_dense_nnz": 384869}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 396921, "linear_attention_total": 2359296, "linear_attention_nnz": 50615, "linear_dense_total": 4718592, "linear_dense_nnz": 346306}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 319004, "linear_attention_total": 2359296, "linear_attention_nnz": 41344, "linear_dense_total": 4718592, "linear_dense_nnz": 277660}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 249183, "linear_attention_total": 2359296, "linear_attention_nnz": 47420, "linear_dense_total": 4718592, "linear_dense_nnz": 201763}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 161062, "linear_attention_total": 2359296, "linear_attention_nnz": 27562, "linear_dense_total": 4718592, "linear_dense_nnz": 133500}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 81705, "linear_attention_total": 2359296, "linear_attention_nnz": 34151, "linear_dense_total": 4718592, "linear_dense_nnz": 47554}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64643, "linear_attention_total": 2359296, "linear_attention_nnz": 21311, "linear_dense_total": 4718592, "linear_dense_nnz": 43332}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 54039, "linear_attention_total": 2359296, "linear_attention_nnz": 17233, "linear_dense_total": 4718592, "linear_dense_nnz": 36806}}, "total_sparsity": 74.51397463933142, "linear_sparsity": 95.5330189363456}, "speed": {"eval_elapsed_time": 69.66989313997328, "optimize_mode": "block_sparse"}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 33917936, "linear_total": 84934656, "linear_nnz": 9959406, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1111233, "linear_attention_total": 2359296, "linear_attention_nnz": 56754, "linear_dense_total": 4718592, "linear_dense_nnz": 1054479}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222867, "linear_attention_total": 2359296, "linear_attention_nnz": 116764, "linear_dense_total": 4718592, "linear_dense_nnz": 1106103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264439, "linear_attention_total": 2359296, "linear_attention_nnz": 127558, "linear_dense_total": 4718592, "linear_dense_nnz": 1136881}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1270104, "linear_attention_total": 2359296, "linear_attention_nnz": 163709, "linear_dense_total": 4718592, "linear_dense_nnz": 1106395}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1202300, "linear_attention_total": 2359296, "linear_attention_nnz": 158018, "linear_dense_total": 4718592, "linear_dense_nnz": 1044282}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1136195, "linear_attention_total": 2359296, "linear_attention_nnz": 125746, "linear_dense_total": 4718592, "linear_dense_nnz": 1010449}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 971117, "linear_attention_total": 2359296, "linear_attention_nnz": 110023, "linear_dense_total": 4718592, "linear_dense_nnz": 861094}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746075, "linear_attention_total": 2359296, "linear_attention_nnz": 113086, "linear_dense_total": 4718592, "linear_dense_nnz": 632989}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488971, "linear_attention_total": 2359296, "linear_attention_nnz": 81879, "linear_dense_total": 4718592, "linear_dense_nnz": 407092}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 250695, "linear_attention_total": 2359296, "linear_attention_nnz": 77365, "linear_dense_total": 4718592, "linear_dense_nnz": 173330}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 172793, "linear_attention_total": 2359296, "linear_attention_nnz": 50915, "linear_dense_total": 4718592, "linear_dense_nnz": 121878}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 122617, "linear_attention_total": 2359296, "linear_attention_nnz": 28303, "linear_dense_total": 4718592, "linear_dense_nnz": 94314}}, "total_sparsity": 68.85210429971255, "linear_sparsity": 88.27403739646628}, "speed": {"eval_elapsed_time": 75.02001089300029}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33825359, "linear_total": 84934656, "linear_nnz": 9866829, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1100628, "linear_attention_total": 2359296, "linear_attention_nnz": 56086, "linear_dense_total": 4718592, "linear_dense_nnz": 1044542}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211778, "linear_attention_total": 2359296, "linear_attention_nnz": 115328, "linear_dense_total": 4718592, "linear_dense_nnz": 1096450}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253069, "linear_attention_total": 2359296, "linear_attention_nnz": 125881, "linear_dense_total": 4718592, "linear_dense_nnz": 1127188}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258511, "linear_attention_total": 2359296, "linear_attention_nnz": 161525, "linear_dense_total": 4718592, "linear_dense_nnz": 1096986}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1191705, "linear_attention_total": 2359296, "linear_attention_nnz": 155911, "linear_dense_total": 4718592, "linear_dense_nnz": 1035794}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125428, "linear_attention_total": 2359296, "linear_attention_nnz": 123921, "linear_dense_total": 4718592, "linear_dense_nnz": 1001507}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 961919, "linear_attention_total": 2359296, "linear_attention_nnz": 108430, "linear_dense_total": 4718592, "linear_dense_nnz": 853489}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738628, "linear_attention_total": 2359296, "linear_attention_nnz": 111505, "linear_dense_total": 4718592, "linear_dense_nnz": 627123}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484188, "linear_attention_total": 2359296, "linear_attention_nnz": 80805, "linear_dense_total": 4718592, "linear_dense_nnz": 403383}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 247948, "linear_attention_total": 2359296, "linear_attention_nnz": 76456, "linear_dense_total": 4718592, "linear_dense_nnz": 171492}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 171235, "linear_attention_total": 2359296, "linear_attention_nnz": 50374, "linear_dense_total": 4718592, "linear_dense_nnz": 120861}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 121792, "linear_attention_total": 2359296, "linear_attention_nnz": 28038, "linear_dense_total": 4718592, "linear_dense_nnz": 93754}}, "total_sparsity": 68.93712063856779, "linear_sparsity": 88.38303530657733}, "speed": {"eval_elapsed_time": 75.69579442497343}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl150_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 29470276, "linear_total": 84934656, "linear_nnz": 5511746, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655184, "linear_attention_total": 2359296, "linear_attention_nnz": 30729, "linear_dense_total": 4718592, "linear_dense_nnz": 624455}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 733131, "linear_attention_total": 2359296, "linear_attention_nnz": 77742, "linear_dense_total": 4718592, "linear_dense_nnz": 655389}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730379, "linear_attention_total": 2359296, "linear_attention_nnz": 70206, "linear_dense_total": 4718592, "linear_dense_nnz": 660173}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 734451, "linear_attention_total": 2359296, "linear_attention_nnz": 106339, "linear_dense_total": 4718592, "linear_dense_nnz": 628112}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655863, "linear_attention_total": 2359296, "linear_attention_nnz": 81845, "linear_dense_total": 4718592, "linear_dense_nnz": 574018}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 606306, "linear_attention_total": 2359296, "linear_attention_nnz": 68554, "linear_dense_total": 4718592, "linear_dense_nnz": 537752}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 492846, "linear_attention_total": 2359296, "linear_attention_nnz": 58217, "linear_dense_total": 4718592, "linear_dense_nnz": 434629}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379389, "linear_attention_total": 2359296, "linear_attention_nnz": 65705, "linear_dense_total": 4718592, "linear_dense_nnz": 313684}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 243207, "linear_attention_total": 2359296, "linear_attention_nnz": 39483, "linear_dense_total": 4718592, "linear_dense_nnz": 203724}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 119606, "linear_attention_total": 2359296, "linear_attention_nnz": 46007, "linear_dense_total": 4718592, "linear_dense_nnz": 73599}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 89281, "linear_attention_total": 2359296, "linear_attention_nnz": 27892, "linear_dense_total": 4718592, "linear_dense_nnz": 61389}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 72103, "linear_attention_total": 2359296, "linear_attention_nnz": 20781, "linear_dense_total": 4718592, "linear_dense_nnz": 51322}}, "total_sparsity": 72.93652882926945, "linear_sparsity": 93.51060419906804}, "speed": {"eval_elapsed_time": 71.46695366402855, "optimize_mode": "block_sparse"}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36773378, "linear_total": 84934656, "linear_nnz": 12814848, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1044480, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1177088, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1450496, "linear_attention_total": 2359296, "linear_attention_nnz": 492032, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652224, "linear_attention_total": 2359296, "linear_attention_nnz": 733696, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1511680, "linear_attention_total": 2359296, "linear_attention_nnz": 461056, "linear_dense_total": 4718592, "linear_dense_nnz": 1050624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1533952, "linear_attention_total": 2359296, "linear_attention_nnz": 580096, "linear_dense_total": 4718592, "linear_dense_nnz": 953856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1227520, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 764928}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 624384, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 700416, "linear_attention_total": 2359296, "linear_attention_nnz": 351744, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 479744, "linear_attention_total": 2359296, "linear_attention_nnz": 339968, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411392, "linear_attention_total": 2359296, "linear_attention_nnz": 276224, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 430080, "linear_attention_total": 2359296, "linear_attention_nnz": 178176, "linear_dense_total": 4718592, "linear_dense_nnz": 251904}}, "total_sparsity": 66.22986308803564, "linear_sparsity": 84.912109375}, "speed": {"eval_elapsed_time": 14.328767778992187}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 46385410, "linear_total": 84934656, "linear_nnz": 22426880, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2004992, "linear_attention_total": 2359296, "linear_attention_nnz": 594944, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2177280, "linear_attention_total": 2359296, "linear_attention_nnz": 672000, "linear_dense_total": 4718592, "linear_dense_nnz": 1505280}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2592256, "linear_attention_total": 2359296, "linear_attention_nnz": 859648, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2838016, "linear_attention_total": 2359296, "linear_attention_nnz": 1172992, "linear_dense_total": 4718592, "linear_dense_nnz": 1665024}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2712832, "linear_attention_total": 2359296, "linear_attention_nnz": 1026304, "linear_dense_total": 4718592, "linear_dense_nnz": 1686528}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2600448, "linear_attention_total": 2359296, "linear_attention_nnz": 976896, "linear_dense_total": 4718592, "linear_dense_nnz": 1623552}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2246144, "linear_attention_total": 2359296, "linear_attention_nnz": 955904, "linear_dense_total": 4718592, "linear_dense_nnz": 1290240}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842688, "linear_attention_total": 2359296, "linear_attention_nnz": 901120, "linear_dense_total": 4718592, "linear_dense_nnz": 941568}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1241856, "linear_attention_total": 2359296, "linear_attention_nnz": 718080, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 693760, "linear_attention_total": 2359296, "linear_attention_nnz": 475648, "linear_dense_total": 4718592, "linear_dense_nnz": 218112}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640768, "linear_attention_total": 2359296, "linear_attention_nnz": 441088, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835840, "linear_attention_total": 2359296, "linear_attention_nnz": 246016, "linear_dense_total": 4718592, "linear_dense_nnz": 589824}}, "total_sparsity": 57.4028351048522, "linear_sparsity": 73.59513647762346}, "speed": {"eval_elapsed_time": 19.36405121401185}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 43189250, "linear_total": 84934656, "linear_nnz": 19230720, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1584896, "linear_attention_total": 2359296, "linear_attention_nnz": 494336, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1917184, "linear_attention_total": 2359296, "linear_attention_nnz": 631552, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2092032, "linear_attention_total": 2359296, "linear_attention_nnz": 648192, "linear_dense_total": 4718592, "linear_dense_nnz": 1443840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2466816, "linear_attention_total": 2359296, "linear_attention_nnz": 1047552, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2403328, "linear_attention_total": 2359296, "linear_attention_nnz": 942592, "linear_dense_total": 4718592, "linear_dense_nnz": 1460736}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2211072, "linear_attention_total": 2359296, "linear_attention_nnz": 837888, "linear_dense_total": 4718592, "linear_dense_nnz": 1373184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1936640, "linear_attention_total": 2359296, "linear_attention_nnz": 841472, "linear_dense_total": 4718592, "linear_dense_nnz": 1095168}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661440, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 827904}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1084160, "linear_attention_total": 2359296, "linear_attention_nnz": 621824, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621056, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 188928}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 568064, "linear_attention_total": 2359296, "linear_attention_nnz": 411392, "linear_dense_total": 4718592, "linear_dense_nnz": 156672}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 684032, "linear_attention_total": 2359296, "linear_attention_nnz": 223232, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 60.33796825450584, "linear_sparsity": 77.3582175925926}, "speed": {"eval_elapsed_time": 17.96685794304358}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42070530, "linear_total": 84934656, "linear_nnz": 18112000, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451008, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 970752}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835264, "linear_attention_total": 2359296, "linear_attention_nnz": 620288, "linear_dense_total": 4718592, "linear_dense_nnz": 1214976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2000384, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1374720}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2271232, "linear_attention_total": 2359296, "linear_attention_nnz": 933376, "linear_dense_total": 4718592, "linear_dense_nnz": 1337856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2267904, "linear_attention_total": 2359296, "linear_attention_nnz": 862464, "linear_dense_total": 4718592, "linear_dense_nnz": 1405440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081536, "linear_attention_total": 2359296, "linear_attention_nnz": 783616, "linear_dense_total": 4718592, "linear_dense_nnz": 1297920}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1807104, "linear_attention_total": 2359296, "linear_attention_nnz": 773376, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602048, "linear_attention_total": 2359296, "linear_attention_nnz": 811008, "linear_dense_total": 4718592, "linear_dense_nnz": 791040}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1009920, "linear_attention_total": 2359296, "linear_attention_nnz": 572160, "linear_dense_total": 4718592, "linear_dense_nnz": 437760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 543232, "linear_attention_total": 2359296, "linear_attention_nnz": 392704, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649472, "linear_attention_total": 2359296, "linear_attention_nnz": 214784, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}}, "total_sparsity": 61.365323630075444, "linear_sparsity": 78.67537133487654}, "speed": {"eval_elapsed_time": 17.98387801699573}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41670402, "linear_total": 84934656, "linear_nnz": 17711872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1409024, "linear_attention_total": 2359296, "linear_attention_nnz": 468992, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1792000, "linear_attention_total": 2359296, "linear_attention_nnz": 606208, "linear_dense_total": 4718592, "linear_dense_nnz": 1185792}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1974272, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1348608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2231552, "linear_attention_total": 2359296, "linear_attention_nnz": 910592, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2209536, "linear_attention_total": 2359296, "linear_attention_nnz": 828672, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2046464, "linear_attention_total": 2359296, "linear_attention_nnz": 765440, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1764096, "linear_attention_total": 2359296, "linear_attention_nnz": 761088, "linear_dense_total": 4718592, "linear_dense_nnz": 1003008}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1573120, "linear_attention_total": 2359296, "linear_attention_nnz": 792832, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 986880, "linear_attention_total": 2359296, "linear_attention_nnz": 553728, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 572672, "linear_attention_total": 2359296, "linear_attention_nnz": 389888, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 525568, "linear_attention_total": 2359296, "linear_attention_nnz": 378112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}}, "total_sparsity": 61.73277361909495, "linear_sparsity": 79.14647231867285}, "speed": {"eval_elapsed_time": 17.518095910025295}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41478658, "linear_total": 84934656, "linear_nnz": 17520128, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404160, "linear_attention_total": 2359296, "linear_attention_nnz": 465664, "linear_dense_total": 4718592, "linear_dense_nnz": 938496}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1766912, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1182720}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1961216, "linear_attention_total": 2359296, "linear_attention_nnz": 615680, "linear_dense_total": 4718592, "linear_dense_nnz": 1345536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 895488, "linear_dense_total": 4718592, "linear_dense_nnz": 1314816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2189824, "linear_attention_total": 2359296, "linear_attention_nnz": 812032, "linear_dense_total": 4718592, "linear_dense_nnz": 1377792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2038016, "linear_attention_total": 2359296, "linear_attention_nnz": 755456, "linear_dense_total": 4718592, "linear_dense_nnz": 1282560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1738240, "linear_attention_total": 2359296, "linear_attention_nnz": 739840, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571584, "linear_attention_total": 2359296, "linear_attention_nnz": 797440, "linear_dense_total": 4718592, "linear_dense_nnz": 774144}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 943872, "linear_attention_total": 2359296, "linear_attention_nnz": 513792, "linear_dense_total": 4718592, "linear_dense_nnz": 430080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563968, "linear_attention_total": 2359296, "linear_attention_nnz": 381184, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516352, "linear_attention_total": 2359296, "linear_attention_nnz": 370432, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 615680, "linear_attention_total": 2359296, "linear_attention_nnz": 200960, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}}, "total_sparsity": 61.90885809879785, "linear_sparsity": 79.37222704475309}, "speed": {"eval_elapsed_time": 17.52969163004309}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40218943, "linear_total": 84934656, "linear_nnz": 16260413, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725694, "linear_attention_total": 2359296, "linear_attention_nnz": 36794, "linear_dense_total": 4718592, "linear_dense_nnz": 1688900}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959620, "linear_attention_total": 2359296, "linear_attention_nnz": 233028, "linear_dense_total": 4718592, "linear_dense_nnz": 1726592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969125, "linear_attention_total": 2359296, "linear_attention_nnz": 194318, "linear_dense_total": 4718592, "linear_dense_nnz": 1774807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2012358, "linear_attention_total": 2359296, "linear_attention_nnz": 270153, "linear_dense_total": 4718592, "linear_dense_nnz": 1742205}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860862, "linear_attention_total": 2359296, "linear_attention_nnz": 207935, "linear_dense_total": 4718592, "linear_dense_nnz": 1652927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815188, "linear_attention_total": 2359296, "linear_attention_nnz": 215427, "linear_dense_total": 4718592, "linear_dense_nnz": 1599761}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518978, "linear_attention_total": 2359296, "linear_attention_nnz": 114563, "linear_dense_total": 4718592, "linear_dense_nnz": 1404415}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307646, "linear_attention_total": 2359296, "linear_attention_nnz": 165011, "linear_dense_total": 4718592, "linear_dense_nnz": 1142635}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 946142, "linear_attention_total": 2359296, "linear_attention_nnz": 86589, "linear_dense_total": 4718592, "linear_dense_nnz": 859553}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531809, "linear_attention_total": 2359296, "linear_attention_nnz": 110020, "linear_dense_total": 4718592, "linear_dense_nnz": 421789}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 419075, "linear_attention_total": 2359296, "linear_attention_nnz": 89475, "linear_dense_total": 4718592, "linear_dense_nnz": 329600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 193916, "linear_attention_total": 2359296, "linear_attention_nnz": 45791, "linear_dense_total": 4718592, "linear_dense_nnz": 148125}}, "total_sparsity": 63.065693568741764, "linear_sparsity": 80.85538487375518}, "speed": {"eval_elapsed_time": 59.936431092966814}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 30451970, "linear_total": 84934656, "linear_nnz": 6493440, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 490240, "linear_attention_total": 2359296, "linear_attention_nnz": 259840, "linear_dense_total": 4718592, "linear_dense_nnz": 230400}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591104, "linear_attention_total": 2359296, "linear_attention_nnz": 225536, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 770048, "linear_attention_total": 2359296, "linear_attention_nnz": 286208, "linear_dense_total": 4718592, "linear_dense_nnz": 483840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 863488, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 747008, "linear_attention_total": 2359296, "linear_attention_nnz": 214016, "linear_dense_total": 4718592, "linear_dense_nnz": 532992}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 763392, "linear_attention_total": 2359296, "linear_attention_nnz": 285696, "linear_dense_total": 4718592, "linear_dense_nnz": 477696}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 499456, "linear_attention_total": 2359296, "linear_attention_nnz": 113920, "linear_dense_total": 4718592, "linear_dense_nnz": 385536}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 650752, "linear_attention_total": 2359296, "linear_attention_nnz": 303616, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 407808, "linear_attention_total": 2359296, "linear_attention_nnz": 162048, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 293888, "linear_attention_total": 2359296, "linear_attention_nnz": 206336, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 206336, "linear_attention_total": 2359296, "linear_attention_nnz": 117248, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 209920, "linear_attention_total": 2359296, "linear_attention_nnz": 103936, "linear_dense_total": 4718592, "linear_dense_nnz": 105984}}, "total_sparsity": 72.03500869191208, "linear_sparsity": 92.35478153935185}, "speed": {"eval_elapsed_time": 11.021364552958403}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 30144002, "linear_total": 84934656, "linear_nnz": 6185472, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 462848, "linear_attention_total": 2359296, "linear_attention_nnz": 237056, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566784, "linear_attention_total": 2359296, "linear_attention_nnz": 219648, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 744960, "linear_attention_total": 2359296, "linear_attention_nnz": 278016, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 823552, "linear_attention_total": 2359296, "linear_attention_nnz": 379648, "linear_dense_total": 4718592, "linear_dense_nnz": 443904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708352, "linear_attention_total": 2359296, "linear_attention_nnz": 193792, "linear_dense_total": 4718592, "linear_dense_nnz": 514560}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 703488, "linear_attention_total": 2359296, "linear_attention_nnz": 247296, "linear_dense_total": 4718592, "linear_dense_nnz": 456192}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488448, "linear_attention_total": 2359296, "linear_attention_nnz": 118272, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 636160, "linear_attention_total": 2359296, "linear_attention_nnz": 296704, "linear_dense_total": 4718592, "linear_dense_nnz": 339456}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 393728, "linear_attention_total": 2359296, "linear_attention_nnz": 152576, "linear_dense_total": 4718592, "linear_dense_nnz": 241152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 278528, "linear_attention_total": 2359296, "linear_attention_nnz": 190976, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 190976, "linear_attention_total": 2359296, "linear_attention_nnz": 104960, "linear_dense_total": 4718592, "linear_dense_nnz": 86016}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 187648, "linear_attention_total": 2359296, "linear_attention_nnz": 90880, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 72.31782528614784, "linear_sparsity": 92.7173755787037}, "speed": {"eval_elapsed_time": 10.957513606990688}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 30105858, "linear_total": 84934656, "linear_nnz": 6147328, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460800, "linear_attention_total": 2359296, "linear_attention_nnz": 235008, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566272, "linear_attention_total": 2359296, "linear_attention_nnz": 220672, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727040, "linear_attention_total": 2359296, "linear_attention_nnz": 260096, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 821760, "linear_attention_total": 2359296, "linear_attention_nnz": 380928, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 718080, "linear_attention_total": 2359296, "linear_attention_nnz": 215808, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 706048, "linear_attention_total": 2359296, "linear_attention_nnz": 251392, "linear_dense_total": 4718592, "linear_dense_nnz": 454656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 475648, "linear_attention_total": 2359296, "linear_attention_nnz": 103936, "linear_dense_total": 4718592, "linear_dense_nnz": 371712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 624384, "linear_attention_total": 2359296, "linear_attention_nnz": 284928, "linear_dense_total": 4718592, "linear_dense_nnz": 339456}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 388608, "linear_attention_total": 2359296, "linear_attention_nnz": 147456, "linear_dense_total": 4718592, "linear_dense_nnz": 241152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 268800, "linear_attention_total": 2359296, "linear_attention_nnz": 181248, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 199168, "linear_attention_total": 2359296, "linear_attention_nnz": 113152, "linear_dense_total": 4718592, "linear_dense_nnz": 86016}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 190720, "linear_attention_total": 2359296, "linear_attention_nnz": 93952, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 72.35285410787779, "linear_sparsity": 92.76228539737654}, "speed": {"eval_elapsed_time": 10.933026321989018}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 36176130, "linear_total": 84934656, "linear_nnz": 12217600, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 911360, "linear_attention_total": 2359296, "linear_attention_nnz": 352256, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1284864, "linear_attention_total": 2359296, "linear_attention_nnz": 478464, "linear_dense_total": 4718592, "linear_dense_nnz": 806400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1386240, "linear_attention_total": 2359296, "linear_attention_nnz": 461568, "linear_dense_total": 4718592, "linear_dense_nnz": 924672}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1616128, "linear_attention_total": 2359296, "linear_attention_nnz": 763648, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1410048, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1415424, "linear_attention_total": 2359296, "linear_attention_nnz": 509184, "linear_dense_total": 4718592, "linear_dense_nnz": 906240}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1157632, "linear_attention_total": 2359296, "linear_attention_nnz": 458752, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1138688, "linear_attention_total": 2359296, "linear_attention_nnz": 550400, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 661504, "linear_attention_total": 2359296, "linear_attention_nnz": 311296, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 448512, "linear_attention_total": 2359296, "linear_attention_nnz": 319488, "linear_dense_total": 4718592, "linear_dense_nnz": 129024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379904, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 407296, "linear_attention_total": 2359296, "linear_attention_nnz": 160000, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}}, "total_sparsity": 66.7783345047871, "linear_sparsity": 85.6152946566358}, "speed": {"eval_elapsed_time": 20.03590824501589, "optimize_mode": "dense"}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33844482, "linear_total": 84934656, "linear_nnz": 9885952, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701184, "linear_attention_total": 2359296, "linear_attention_nnz": 295680, "linear_dense_total": 4718592, "linear_dense_nnz": 405504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042688, "linear_attention_total": 2359296, "linear_attention_nnz": 380672, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1087744, "linear_attention_total": 2359296, "linear_attention_nnz": 328960, "linear_dense_total": 4718592, "linear_dense_nnz": 758784}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1340672, "linear_attention_total": 2359296, "linear_attention_nnz": 612608, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1142784, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 811008}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1165312, "linear_attention_total": 2359296, "linear_attention_nnz": 411136, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908032, "linear_attention_total": 2359296, "linear_attention_nnz": 319744, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 956672, "linear_attention_total": 2359296, "linear_attention_nnz": 457472, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 557056, "linear_attention_total": 2359296, "linear_attention_nnz": 246784, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360192, "linear_attention_total": 2359296, "linear_attention_nnz": 252672, "linear_dense_total": 4718592, "linear_dense_nnz": 107520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315904, "linear_attention_total": 2359296, "linear_attention_nnz": 202240, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 307712, "linear_attention_total": 2359296, "linear_attention_nnz": 129536, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}}, "total_sparsity": 68.91955939281638, "linear_sparsity": 88.36052035108025}, "speed": {"eval_elapsed_time": 14.22898046800401, "optimize_mode": "dense"}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl300_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 26853628, "linear_total": 84934656, "linear_nnz": 2895098, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 357857, "linear_attention_total": 2359296, "linear_attention_nnz": 13195, "linear_dense_total": 4718592, "linear_dense_nnz": 344662}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 405482, "linear_attention_total": 2359296, "linear_attention_nnz": 53357, "linear_dense_total": 4718592, "linear_dense_nnz": 352125}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 395119, "linear_attention_total": 2359296, "linear_attention_nnz": 43981, "linear_dense_total": 4718592, "linear_dense_nnz": 351138}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 394117, "linear_attention_total": 2359296, "linear_attention_nnz": 71058, "linear_dense_total": 4718592, "linear_dense_nnz": 323059}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 335373, "linear_attention_total": 2359296, "linear_attention_nnz": 47705, "linear_dense_total": 4718592, "linear_dense_nnz": 287668}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 292526, "linear_attention_total": 2359296, "linear_attention_nnz": 40348, "linear_dense_total": 4718592, "linear_dense_nnz": 252178}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 238114, "linear_attention_total": 2359296, "linear_attention_nnz": 33002, "linear_dense_total": 4718592, "linear_dense_nnz": 205112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 188891, "linear_attention_total": 2359296, "linear_attention_nnz": 38753, "linear_dense_total": 4718592, "linear_dense_nnz": 150138}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 123365, "linear_attention_total": 2359296, "linear_attention_nnz": 22052, "linear_dense_total": 4718592, "linear_dense_nnz": 101313}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64415, "linear_attention_total": 2359296, "linear_attention_nnz": 28498, "linear_dense_total": 4718592, "linear_dense_nnz": 35917}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 53470, "linear_attention_total": 2359296, "linear_attention_nnz": 18747, "linear_dense_total": 4718592, "linear_dense_nnz": 34723}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 46369, "linear_attention_total": 2359296, "linear_attention_nnz": 15957, "linear_dense_total": 4718592, "linear_dense_nnz": 30412}}, "total_sparsity": 75.33947808267818, "linear_sparsity": 96.59138196780358}, "speed": {"eval_elapsed_time": 68.15529748401605, "optimize_mode": "block_sparse"}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 38110440, "linear_total": 84934656, "linear_nnz": 14151910, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1521793, "linear_attention_total": 2359296, "linear_attention_nnz": 87221, "linear_dense_total": 4718592, "linear_dense_nnz": 1434572}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637844, "linear_attention_total": 2359296, "linear_attention_nnz": 157517, "linear_dense_total": 4718592, "linear_dense_nnz": 1480327}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1723746, "linear_attention_total": 2359296, "linear_attention_nnz": 188172, "linear_dense_total": 4718592, "linear_dense_nnz": 1535574}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742961, "linear_attention_total": 2359296, "linear_attention_nnz": 230341, "linear_dense_total": 4718592, "linear_dense_nnz": 1512620}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1687428, "linear_attention_total": 2359296, "linear_attention_nnz": 240387, "linear_dense_total": 4718592, "linear_dense_nnz": 1447041}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1623377, "linear_attention_total": 2359296, "linear_attention_nnz": 195780, "linear_dense_total": 4718592, "linear_dense_nnz": 1427597}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1429982, "linear_attention_total": 2359296, "linear_attention_nnz": 184963, "linear_dense_total": 4718592, "linear_dense_nnz": 1245019}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130199, "linear_attention_total": 2359296, "linear_attention_nnz": 172954, "linear_dense_total": 4718592, "linear_dense_nnz": 957245}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 773896, "linear_attention_total": 2359296, "linear_attention_nnz": 138133, "linear_dense_total": 4718592, "linear_dense_nnz": 635763}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417863, "linear_attention_total": 2359296, "linear_attention_nnz": 112972, "linear_dense_total": 4718592, "linear_dense_nnz": 304891}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279992, "linear_attention_total": 2359296, "linear_attention_nnz": 75446, "linear_dense_total": 4718592, "linear_dense_nnz": 204546}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 182829, "linear_attention_total": 2359296, "linear_attention_nnz": 38439, "linear_dense_total": 4718592, "linear_dense_nnz": 144390}}, "total_sparsity": 65.00199746198996, "linear_sparsity": 83.3378850677867}, "speed": {"eval_elapsed_time": 78.46566343901213, "optimize_mode": "block_sparse"}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37366845, "linear_total": 84934656, "linear_nnz": 13408315, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1442154, "linear_attention_total": 2359296, "linear_attention_nnz": 79341, "linear_dense_total": 4718592, "linear_dense_nnz": 1362813}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1557975, "linear_attention_total": 2359296, "linear_attention_nnz": 146964, "linear_dense_total": 4718592, "linear_dense_nnz": 1411011}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637409, "linear_attention_total": 2359296, "linear_attention_nnz": 173655, "linear_dense_total": 4718592, "linear_dense_nnz": 1463754}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1655712, "linear_attention_total": 2359296, "linear_attention_nnz": 213353, "linear_dense_total": 4718592, "linear_dense_nnz": 1442359}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601748, "linear_attention_total": 2359296, "linear_attention_nnz": 221518, "linear_dense_total": 4718592, "linear_dense_nnz": 1380230}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1539647, "linear_attention_total": 2359296, "linear_attention_nnz": 179373, "linear_dense_total": 4718592, "linear_dense_nnz": 1360274}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1352289, "linear_attention_total": 2359296, "linear_attention_nnz": 168393, "linear_dense_total": 4718592, "linear_dense_nnz": 1183896}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1066215, "linear_attention_total": 2359296, "linear_attention_nnz": 159612, "linear_dense_total": 4718592, "linear_dense_nnz": 906603}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727923, "linear_attention_total": 2359296, "linear_attention_nnz": 127230, "linear_dense_total": 4718592, "linear_dense_nnz": 600693}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 390947, "linear_attention_total": 2359296, "linear_attention_nnz": 105257, "linear_dense_total": 4718592, "linear_dense_nnz": 285690}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 262617, "linear_attention_total": 2359296, "linear_attention_nnz": 70746, "linear_dense_total": 4718592, "linear_dense_nnz": 191871}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 173679, "linear_attention_total": 2359296, "linear_attention_nnz": 36271, "linear_dense_total": 4718592, "linear_dense_nnz": 137408}}, "total_sparsity": 65.68486388119823, "linear_sparsity": 84.21337575088313}, "speed": {"eval_elapsed_time": 78.30115663801553, "optimize_mode": "block_sparse"}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 45252556, "linear_total": 84934656, "linear_nnz": 21294026, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2152743, "linear_attention_total": 2359296, "linear_attention_nnz": 158912, "linear_dense_total": 4718592, "linear_dense_nnz": 1993831}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2265132, "linear_attention_total": 2359296, "linear_attention_nnz": 234395, "linear_dense_total": 4718592, "linear_dense_nnz": 2030737}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415512, "linear_attention_total": 2359296, "linear_attention_nnz": 301048, "linear_dense_total": 4718592, "linear_dense_nnz": 2114464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465567, "linear_attention_total": 2359296, "linear_attention_nnz": 358791, "linear_dense_total": 4718592, "linear_dense_nnz": 2106776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2457267, "linear_attention_total": 2359296, "linear_attention_nnz": 398673, "linear_dense_total": 4718592, "linear_dense_nnz": 2058594}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2410577, "linear_attention_total": 2359296, "linear_attention_nnz": 367333, "linear_dense_total": 4718592, "linear_dense_nnz": 2043244}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2206780, "linear_attention_total": 2359296, "linear_attention_nnz": 344288, "linear_dense_total": 4718592, "linear_dense_nnz": 1862492}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1819031, "linear_attention_total": 2359296, "linear_attention_nnz": 304514, "linear_dense_total": 4718592, "linear_dense_nnz": 1514517}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1364821, "linear_attention_total": 2359296, "linear_attention_nnz": 265513, "linear_dense_total": 4718592, "linear_dense_nnz": 1099308}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 828990, "linear_attention_total": 2359296, "linear_attention_nnz": 201714, "linear_dense_total": 4718592, "linear_dense_nnz": 627276}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574541, "linear_attention_total": 2359296, "linear_attention_nnz": 134277, "linear_dense_total": 4718592, "linear_dense_nnz": 440264}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 333065, "linear_attention_total": 2359296, "linear_attention_nnz": 63309, "linear_dense_total": 4718592, "linear_dense_nnz": 269756}}, "total_sparsity": 58.4431701722824, "linear_sparsity": 74.92893124804085}, "speed": {"eval_elapsed_time": 81.4040583850001, "optimize_mode": "block_sparse"}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 31863042, "linear_total": 84934656, "linear_nnz": 7904512, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 562688, "linear_attention_total": 2359296, "linear_attention_nnz": 260096, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852736, "linear_attention_total": 2359296, "linear_attention_nnz": 361216, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 836352, "linear_attention_total": 2359296, "linear_attention_nnz": 249600, "linear_dense_total": 4718592, "linear_dense_nnz": 586752}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1037824, "linear_attention_total": 2359296, "linear_attention_nnz": 487936, "linear_dense_total": 4718592, "linear_dense_nnz": 549888}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 981760, "linear_attention_total": 2359296, "linear_attention_nnz": 315136, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893184, "linear_attention_total": 2359296, "linear_attention_nnz": 329472, "linear_dense_total": 4718592, "linear_dense_nnz": 563712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621312, "linear_attention_total": 2359296, "linear_attention_nnz": 160512, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817664, "linear_attention_total": 2359296, "linear_attention_nnz": 407552, "linear_dense_total": 4718592, "linear_dense_nnz": 410112}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 446208, "linear_attention_total": 2359296, "linear_attention_nnz": 175872, "linear_dense_total": 4718592, "linear_dense_nnz": 270336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315392, "linear_attention_total": 2359296, "linear_attention_nnz": 218624, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279552, "linear_attention_total": 2359296, "linear_attention_nnz": 187392, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 259840, "linear_attention_total": 2359296, "linear_attention_nnz": 118528, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}}, "total_sparsity": 70.73917738066733, "linear_sparsity": 90.6934196566358}, "speed": {"eval_elapsed_time": 11.901203104003798}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 37879298, "linear_total": 84934656, "linear_nnz": 13920768, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1160960, "linear_attention_total": 2359296, "linear_attention_nnz": 454400, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245184, "linear_attention_total": 2359296, "linear_attention_nnz": 400384, "linear_dense_total": 4718592, "linear_dense_nnz": 844800}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553408, "linear_attention_total": 2359296, "linear_attention_nnz": 518144, "linear_dense_total": 4718592, "linear_dense_nnz": 1035264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1787648, "linear_attention_total": 2359296, "linear_attention_nnz": 803072, "linear_dense_total": 4718592, "linear_dense_nnz": 984576}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1640960, "linear_attention_total": 2359296, "linear_attention_nnz": 555008, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661696, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1015296}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383680, "linear_attention_total": 2359296, "linear_attention_nnz": 583424, "linear_dense_total": 4718592, "linear_dense_nnz": 800256}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1309440, "linear_attention_total": 2359296, "linear_attention_nnz": 652032, "linear_dense_total": 4718592, "linear_dense_nnz": 657408}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 689664, "linear_attention_total": 2359296, "linear_attention_nnz": 333312, "linear_dense_total": 4718592, "linear_dense_nnz": 356352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520960, "linear_attention_total": 2359296, "linear_attention_nnz": 382720, "linear_dense_total": 4718592, "linear_dense_nnz": 138240}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 461568, "linear_attention_total": 2359296, "linear_attention_nnz": 314112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 505600, "linear_attention_total": 2359296, "linear_attention_nnz": 203008, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}}, "total_sparsity": 65.21426235063046, "linear_sparsity": 83.61002604166666}, "speed": {"eval_elapsed_time": 14.474253287015017}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35435778, "linear_total": 84934656, "linear_nnz": 11477248, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887040, "linear_attention_total": 2359296, "linear_attention_nnz": 384768, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1057792, "linear_attention_total": 2359296, "linear_attention_nnz": 355840, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1285888, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497088, "linear_attention_total": 2359296, "linear_attention_nnz": 672256, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1350912, "linear_attention_total": 2359296, "linear_attention_nnz": 418560, "linear_dense_total": 4718592, "linear_dense_nnz": 932352}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1395712, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1154816, "linear_attention_total": 2359296, "linear_attention_nnz": 498944, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059840, "linear_attention_total": 2359296, "linear_attention_nnz": 497664, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 609024, "linear_attention_total": 2359296, "linear_attention_nnz": 297216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 436224, "linear_attention_total": 2359296, "linear_attention_nnz": 316416, "linear_dense_total": 4718592, "linear_dense_nnz": 119808}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 256512, "linear_dense_total": 4718592, "linear_dense_nnz": 115200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371200, "linear_attention_total": 2359296, "linear_attention_nnz": 150016, "linear_dense_total": 4718592, "linear_dense_nnz": 221184}}, "total_sparsity": 67.45822277621669, "linear_sparsity": 86.4869671103395}, "speed": {"eval_elapsed_time": 13.766221412981395}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold_apme-sigmoied_threshold_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 61067266, "linear_total": 84934656, "linear_nnz": 37108736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3221504, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 2607104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3504128, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 2899968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4439040, "linear_attention_total": 2359296, "linear_attention_nnz": 730112, "linear_dense_total": 4718592, "linear_dense_nnz": 3708928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4859904, "linear_attention_total": 2359296, "linear_attention_nnz": 1044480, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4734976, "linear_attention_total": 2359296, "linear_attention_nnz": 1012736, "linear_dense_total": 4718592, "linear_dense_nnz": 3722240}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4660224, "linear_attention_total": 2359296, "linear_attention_nnz": 882688, "linear_dense_total": 4718592, "linear_dense_nnz": 3777536}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4239360, "linear_attention_total": 2359296, "linear_attention_nnz": 980992, "linear_dense_total": 4718592, "linear_dense_nnz": 3258368}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3137536, "linear_attention_total": 2359296, "linear_attention_nnz": 903168, "linear_dense_total": 4718592, "linear_dense_nnz": 2234368}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835008, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 1124352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877568, "linear_attention_total": 2359296, "linear_attention_nnz": 552960, "linear_dense_total": 4718592, "linear_dense_nnz": 324608}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852992, "linear_attention_total": 2359296, "linear_attention_nnz": 401408, "linear_dense_total": 4718592, "linear_dense_nnz": 451584}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746496, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 501760}}, "total_sparsity": 43.920030037508496, "linear_sparsity": 56.309076003086425}, "speed": {"eval_elapsed_time": 47.75363156700041}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-30000": {"stats": {"total": 108893186, "nnz": 67034114, "linear_total": 84934656, "linear_nnz": 43075584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4692480, "linear_attention_total": 2359296, "linear_attention_nnz": 892416, "linear_dense_total": 4718592, "linear_dense_nnz": 3800064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4379136, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 3657216}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4735488, "linear_attention_total": 2359296, "linear_attention_nnz": 920064, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4850688, "linear_attention_total": 2359296, "linear_attention_nnz": 1052160, "linear_dense_total": 4718592, "linear_dense_nnz": 3798528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4751616, "linear_attention_total": 2359296, "linear_attention_nnz": 1118976, "linear_dense_total": 4718592, "linear_dense_nnz": 3632640}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4547328, "linear_attention_total": 2359296, "linear_attention_nnz": 1017600, "linear_dense_total": 4718592, "linear_dense_nnz": 3529728}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4306944, "linear_attention_total": 2359296, "linear_attention_nnz": 1061376, "linear_dense_total": 4718592, "linear_dense_nnz": 3245568}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3525888, "linear_attention_total": 2359296, "linear_attention_nnz": 793344, "linear_dense_total": 4718592, "linear_dense_nnz": 2732544}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2793216, "linear_attention_total": 2359296, "linear_attention_nnz": 919296, "linear_dense_total": 4718592, "linear_dense_nnz": 1873920}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1507584, "linear_attention_total": 2359296, "linear_attention_nnz": 541440, "linear_dense_total": 4718592, "linear_dense_nnz": 966144}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130496, "linear_attention_total": 2359296, "linear_attention_nnz": 443904, "linear_dense_total": 4718592, "linear_dense_nnz": 686592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1854720, "linear_attention_total": 2359296, "linear_attention_nnz": 332544, "linear_dense_total": 4718592, "linear_dense_nnz": 1522176}}, "total_sparsity": 38.44048791078626, "linear_sparsity": 49.283854166666664}, "speed": {"eval_elapsed_time": 32.06774970900733}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 43891202, "linear_total": 84934656, "linear_nnz": 19932672, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045184, "linear_attention_total": 2359296, "linear_attention_nnz": 427776, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102784, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 1708032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2424576, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 1955328}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2502912, "linear_attention_total": 2359296, "linear_attention_nnz": 579840, "linear_dense_total": 4718592, "linear_dense_nnz": 1923072}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2376960, "linear_attention_total": 2359296, "linear_attention_nnz": 539904, "linear_dense_total": 4718592, "linear_dense_nnz": 1837056}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2201856, "linear_attention_total": 2359296, "linear_attention_nnz": 424704, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1907712, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 1468416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1580544, "linear_attention_total": 2359296, "linear_attention_nnz": 428544, "linear_dense_total": 4718592, "linear_dense_nnz": 1152000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1095168, "linear_attention_total": 2359296, "linear_attention_nnz": 397824, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527616, "linear_attention_total": 2359296, "linear_attention_nnz": 235776, "linear_dense_total": 4718592, "linear_dense_nnz": 291840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 428544, "linear_attention_total": 2359296, "linear_attention_nnz": 182784, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738816, "linear_attention_total": 2359296, "linear_attention_nnz": 112128, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}}, "total_sparsity": 59.6933438975695, "linear_sparsity": 76.53175636574075}, "speed": {"eval_elapsed_time": 23.040934944001492}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49229570, "linear_total": 84934656, "linear_nnz": 25271040, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214400, "linear_attention_total": 2359296, "linear_attention_nnz": 721408, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2390784, "linear_attention_total": 2359296, "linear_attention_nnz": 635136, "linear_dense_total": 4718592, "linear_dense_nnz": 1755648}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850560, "linear_attention_total": 2359296, "linear_attention_nnz": 972032, "linear_dense_total": 4718592, "linear_dense_nnz": 1878528}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3188736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 1932288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3149824, "linear_attention_total": 2359296, "linear_attention_nnz": 1260544, "linear_dense_total": 4718592, "linear_dense_nnz": 1889280}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 1784832}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2455040, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015744, "linear_attention_total": 2359296, "linear_attention_nnz": 988160, "linear_dense_total": 4718592, "linear_dense_nnz": 1027584}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1550080, "linear_attention_total": 2359296, "linear_attention_nnz": 903424, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 886784, "linear_attention_total": 2359296, "linear_attention_nnz": 636416, "linear_dense_total": 4718592, "linear_dense_nnz": 250368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 682752, "linear_attention_total": 2359296, "linear_attention_nnz": 484608, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 980224, "linear_attention_total": 2359296, "linear_attention_nnz": 313600, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}}, "total_sparsity": 54.79095450471988, "linear_sparsity": 70.2464916087963}, "speed": {"eval_elapsed_time": 20.163633761985693}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 53943554, "linear_total": 84934656, "linear_nnz": 29985024, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3576576, "linear_attention_total": 2359296, "linear_attention_nnz": 840960, "linear_dense_total": 4718592, "linear_dense_nnz": 2735616}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070464, "linear_attention_total": 2359296, "linear_attention_nnz": 288768, "linear_dense_total": 4718592, "linear_dense_nnz": 2781696}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3222528, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 3024384}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3771648, "linear_attention_total": 2359296, "linear_attention_nnz": 770304, "linear_dense_total": 4718592, "linear_dense_nnz": 3001344}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3129600, "linear_attention_total": 2359296, "linear_attention_nnz": 393984, "linear_dense_total": 4718592, "linear_dense_nnz": 2735616}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2864640, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 2668032}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2801664, "linear_attention_total": 2359296, "linear_attention_nnz": 548352, "linear_dense_total": 4718592, "linear_dense_nnz": 2253312}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2118144, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 1920000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1646592, "linear_attention_total": 2359296, "linear_attention_nnz": 284160, "linear_dense_total": 4718592, "linear_dense_nnz": 1362432}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 861696, "linear_attention_total": 2359296, "linear_attention_nnz": 202752, "linear_dense_total": 4718592, "linear_dense_nnz": 658944}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 288000, "linear_dense_total": 4718592, "linear_dense_nnz": 907776}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725696, "linear_attention_total": 2359296, "linear_attention_nnz": 315648, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}}, "total_sparsity": 50.461956361530284, "linear_sparsity": 64.69636140046296}, "speed": {"eval_elapsed_time": 14.269501545990352}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49808642, "linear_total": 84934656, "linear_nnz": 25850112, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2888448, "linear_attention_total": 2359296, "linear_attention_nnz": 652032, "linear_dense_total": 4718592, "linear_dense_nnz": 2236416}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2680320, "linear_attention_total": 2359296, "linear_attention_nnz": 293376, "linear_dense_total": 4718592, "linear_dense_nnz": 2386944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2864640, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 2666496}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3171072, "linear_attention_total": 2359296, "linear_attention_nnz": 530688, "linear_dense_total": 4718592, "linear_dense_nnz": 2640384}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2757888, "linear_attention_total": 2359296, "linear_attention_nnz": 392448, "linear_dense_total": 4718592, "linear_dense_nnz": 2365440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2569728, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 2373120}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2244096, "linear_attention_total": 2359296, "linear_attention_nnz": 310272, "linear_dense_total": 4718592, "linear_dense_nnz": 1933824}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1843968, "linear_attention_total": 2359296, "linear_attention_nnz": 197376, "linear_dense_total": 4718592, "linear_dense_nnz": 1646592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1384704, "linear_attention_total": 2359296, "linear_attention_nnz": 200448, "linear_dense_total": 4718592, "linear_dense_nnz": 1184256}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 204288, "linear_dense_total": 4718592, "linear_dense_nnz": 556032}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1115904, "linear_attention_total": 2359296, "linear_attention_nnz": 286464, "linear_dense_total": 4718592, "linear_dense_nnz": 829440}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1569024, "linear_attention_total": 2359296, "linear_attention_nnz": 315648, "linear_dense_total": 4718592, "linear_dense_nnz": 1253376}}, "total_sparsity": 54.25917467416189, "linear_sparsity": 69.56470630787037}, "speed": {"eval_elapsed_time": 12.333724958996754}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 42173698, "linear_total": 84934656, "linear_nnz": 18215168, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1516544, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1758464, "linear_attention_total": 2359296, "linear_attention_nnz": 564992, "linear_dense_total": 4718592, "linear_dense_nnz": 1193472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2030080, "linear_attention_total": 2359296, "linear_attention_nnz": 646144, "linear_dense_total": 4718592, "linear_dense_nnz": 1383936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2328832, "linear_attention_total": 2359296, "linear_attention_nnz": 969472, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2283264, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1425408}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2099200, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 1396224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1846784, "linear_attention_total": 2359296, "linear_attention_nnz": 774656, "linear_dense_total": 4718592, "linear_dense_nnz": 1072128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1589760, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 783360}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 967424, "linear_attention_total": 2359296, "linear_attention_nnz": 520448, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 617216, "linear_attention_total": 2359296, "linear_attention_nnz": 435968, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 521984, "linear_attention_total": 2359296, "linear_attention_nnz": 354560, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655616, "linear_attention_total": 2359296, "linear_attention_nnz": 231680, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.27058124647028, "linear_sparsity": 78.55390383873457}, "speed": {"eval_elapsed_time": 16.621274546021596}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42038274, "linear_total": 84934656, "linear_nnz": 18079744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1493248, "linear_attention_total": 2359296, "linear_attention_nnz": 519424, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757440, "linear_attention_total": 2359296, "linear_attention_nnz": 565504, "linear_dense_total": 4718592, "linear_dense_nnz": 1191936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028800, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1382400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2297088, "linear_attention_total": 2359296, "linear_attention_nnz": 937728, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2270464, "linear_attention_total": 2359296, "linear_attention_nnz": 846592, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081792, "linear_attention_total": 2359296, "linear_attention_nnz": 688640, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815296, "linear_attention_total": 2359296, "linear_attention_nnz": 744704, "linear_dense_total": 4718592, "linear_dense_nnz": 1070592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1613312, "linear_attention_total": 2359296, "linear_attention_nnz": 831488, "linear_dense_total": 4718592, "linear_dense_nnz": 781824}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 522496, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 594944, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 513792, "linear_attention_total": 2359296, "linear_attention_nnz": 346368, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 644096, "linear_attention_total": 2359296, "linear_attention_nnz": 220160, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.39494531824976, "linear_sparsity": 78.7133487654321}, "speed": {"eval_elapsed_time": 16.59519276500214}}}, "base_speed_report": {"eval_elapsed_time": 38.66623001394328, "optimize_mode": "disabled"}} \ No newline at end of file diff --git a/analysis/files/results/results10.json b/analysis/files/results/results10.json deleted file mode 100644 index 2ca3586c..00000000 --- a/analysis/files/results/results10.json +++ /dev/null @@ -1 +0,0 @@ -{"checkpoints": {"/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_test5/checkpoint-22132": {"stats": {"total": 90984386, "nnz": 40951962, "linear_total": 67043328, "linear_nnz": 17043456, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1268736, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "1": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1296384, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "2": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1440768, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "3": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2006016, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "4": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1709568, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1863168, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "6": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1628160, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1901568, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 525312}, "8": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 923136, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1096704, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1104384, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 804864, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 54.990121052199, "linear_sparsity": 74.57844574780059}, "speed": {"eval_elapsed_time": 14.065016260137782}, "opt_eval_metrics": {"exact_match": 78.63765373699148, "f1": 86.69392512957342}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53990689, "linear_total": 84934656, "linear_nnz": 30067968, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2828544, "linear_attention_total": 2359296, "linear_attention_nnz": 880896, "linear_dense_total": 4718592, "linear_dense_nnz": 1947648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2819840, "linear_attention_total": 2359296, "linear_attention_nnz": 849152, "linear_dense_total": 4718592, "linear_dense_nnz": 1970688}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3367424, "linear_attention_total": 2359296, "linear_attention_nnz": 1169408, "linear_dense_total": 4718592, "linear_dense_nnz": 2198016}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3521280, "linear_attention_total": 2359296, "linear_attention_nnz": 1352448, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3693824, "linear_attention_total": 2359296, "linear_attention_nnz": 1524992, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3554560, "linear_attention_total": 2359296, "linear_attention_nnz": 1511680, "linear_dense_total": 4718592, "linear_dense_nnz": 2042880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2935296, "linear_attention_total": 2359296, "linear_attention_nnz": 1336320, "linear_dense_total": 4718592, "linear_dense_nnz": 1598976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2452992, "linear_attention_total": 2359296, "linear_attention_nnz": 1178112, "linear_dense_total": 4718592, "linear_dense_nnz": 1274880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1836032, "linear_attention_total": 2359296, "linear_attention_nnz": 1134080, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1024000, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 321024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 812032, "linear_attention_total": 2359296, "linear_attention_nnz": 583168, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222144, "linear_attention_total": 2359296, "linear_attention_nnz": 397312, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}}, "total_sparsity": 50.41867082482094, "linear_sparsity": 64.59870515046296}, "speed": {"eval_elapsed_time": 23.56436571292579}, "opt_eval_metrics": {"exact_match": 80.93661305581836, "f1": 88.35425478567389}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53994017, "linear_total": 84934656, "linear_nnz": 30071296, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2825984, "linear_attention_total": 2359296, "linear_attention_nnz": 878336, "linear_dense_total": 4718592, "linear_dense_nnz": 1947648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2823424, "linear_attention_total": 2359296, "linear_attention_nnz": 852736, "linear_dense_total": 4718592, "linear_dense_nnz": 1970688}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3366400, "linear_attention_total": 2359296, "linear_attention_nnz": 1168384, "linear_dense_total": 4718592, "linear_dense_nnz": 2198016}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3529216, "linear_attention_total": 2359296, "linear_attention_nnz": 1360384, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3694080, "linear_attention_total": 2359296, "linear_attention_nnz": 1525248, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3562240, "linear_attention_total": 2359296, "linear_attention_nnz": 1519360, "linear_dense_total": 4718592, "linear_dense_nnz": 2042880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2944768, "linear_attention_total": 2359296, "linear_attention_nnz": 1345792, "linear_dense_total": 4718592, "linear_dense_nnz": 1598976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2450176, "linear_attention_total": 2359296, "linear_attention_nnz": 1175296, "linear_dense_total": 4718592, "linear_dense_nnz": 1274880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828864, "linear_attention_total": 2359296, "linear_attention_nnz": 1126912, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023488, "linear_attention_total": 2359296, "linear_attention_nnz": 702464, "linear_dense_total": 4718592, "linear_dense_nnz": 321024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 812032, "linear_attention_total": 2359296, "linear_attention_nnz": 583168, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1210624, "linear_attention_total": 2359296, "linear_attention_nnz": 385792, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}}, "total_sparsity": 50.41561461889819, "linear_sparsity": 64.5947868441358}, "speed": {"eval_elapsed_time": 23.61654355400242}, "opt_eval_metrics": {"exact_match": 81.11636707663197, "f1": 88.26635621180897}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l20--2021-01-19--16-59-13/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 49113499, "linear_total": 84934656, "linear_nnz": 25174883, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2532837, "linear_attention_total": 2359296, "linear_attention_nnz": 278464, "linear_dense_total": 4718592, "linear_dense_nnz": 2254373}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2724403, "linear_attention_total": 2359296, "linear_attention_nnz": 411200, "linear_dense_total": 4718592, "linear_dense_nnz": 2313203}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2772181, "linear_attention_total": 2359296, "linear_attention_nnz": 388544, "linear_dense_total": 4718592, "linear_dense_nnz": 2383637}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2962889, "linear_attention_total": 2359296, "linear_attention_nnz": 616064, "linear_dense_total": 4718592, "linear_dense_nnz": 2346825}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2756799, "linear_attention_total": 2359296, "linear_attention_nnz": 475392, "linear_dense_total": 4718592, "linear_dense_nnz": 2281407}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2741284, "linear_attention_total": 2359296, "linear_attention_nnz": 485760, "linear_dense_total": 4718592, "linear_dense_nnz": 2255524}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2526246, "linear_attention_total": 2359296, "linear_attention_nnz": 436416, "linear_dense_total": 4718592, "linear_dense_nnz": 2089830}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2233017, "linear_attention_total": 2359296, "linear_attention_nnz": 473664, "linear_dense_total": 4718592, "linear_dense_nnz": 1759353}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652692, "linear_attention_total": 2359296, "linear_attention_nnz": 292096, "linear_dense_total": 4718592, "linear_dense_nnz": 1360596}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1056535, "linear_attention_total": 2359296, "linear_attention_nnz": 260864, "linear_dense_total": 4718592, "linear_dense_nnz": 795671}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 795434, "linear_attention_total": 2359296, "linear_attention_nnz": 207872, "linear_dense_total": 4718592, "linear_dense_nnz": 587562}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 420566, "linear_attention_total": 2359296, "linear_attention_nnz": 115648, "linear_dense_total": 4718592, "linear_dense_nnz": 304918}}, "total_sparsity": 54.89754611459343, "linear_sparsity": 70.35970452391072}, "speed": {"eval_elapsed_time": 29.429046569159254}, "opt_eval_metrics": {"exact_match": 80.15137180700094, "f1": 87.62280270760408}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 50398933, "linear_total": 84934656, "linear_nnz": 26460853, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673633, "linear_attention_total": 2359296, "linear_attention_nnz": 341248, "linear_dense_total": 4718592, "linear_dense_nnz": 2332385}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850180, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 2387588}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2871056, "linear_attention_total": 2359296, "linear_attention_nnz": 412672, "linear_dense_total": 4718592, "linear_dense_nnz": 2458384}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114067, "linear_attention_total": 2359296, "linear_attention_nnz": 692736, "linear_dense_total": 4718592, "linear_dense_nnz": 2421331}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2853594, "linear_attention_total": 2359296, "linear_attention_nnz": 505088, "linear_dense_total": 4718592, "linear_dense_nnz": 2348506}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2871518, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 2322654}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2608144, "linear_attention_total": 2359296, "linear_attention_nnz": 469504, "linear_dense_total": 4718592, "linear_dense_nnz": 2138640}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2382631, "linear_attention_total": 2359296, "linear_attention_nnz": 552448, "linear_dense_total": 4718592, "linear_dense_nnz": 1830183}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757175, "linear_attention_total": 2359296, "linear_attention_nnz": 316672, "linear_dense_total": 4718592, "linear_dense_nnz": 1440503}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1151305, "linear_attention_total": 2359296, "linear_attention_nnz": 292096, "linear_dense_total": 4718592, "linear_dense_nnz": 859209}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 873504, "linear_attention_total": 2359296, "linear_attention_nnz": 227328, "linear_dense_total": 4718592, "linear_dense_nnz": 646176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 454046, "linear_attention_total": 2359296, "linear_attention_nnz": 128000, "linear_dense_total": 4718592, "linear_dense_nnz": 326046}}, "total_sparsity": 53.71709208691902, "linear_sparsity": 68.84563469592435}, "speed": {"eval_elapsed_time": 28.692298884037882}, "opt_eval_metrics": {"exact_match": 79.92431409649953, "f1": 87.57193515884181}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 50358753, "linear_total": 84934656, "linear_nnz": 26420688, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2668105, "linear_attention_total": 2359296, "linear_attention_nnz": 335872, "linear_dense_total": 4718592, "linear_dense_nnz": 2332233}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2839080, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 2387496}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2866908, "linear_attention_total": 2359296, "linear_attention_nnz": 408576, "linear_dense_total": 4718592, "linear_dense_nnz": 2458332}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3103682, "linear_attention_total": 2359296, "linear_attention_nnz": 682496, "linear_dense_total": 4718592, "linear_dense_nnz": 2421186}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2853238, "linear_attention_total": 2359296, "linear_attention_nnz": 504832, "linear_dense_total": 4718592, "linear_dense_nnz": 2348406}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2880784, "linear_attention_total": 2359296, "linear_attention_nnz": 558336, "linear_dense_total": 4718592, "linear_dense_nnz": 2322448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2614378, "linear_attention_total": 2359296, "linear_attention_nnz": 475904, "linear_dense_total": 4718592, "linear_dense_nnz": 2138474}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2372808, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 1830088}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1752978, "linear_attention_total": 2359296, "linear_attention_nnz": 312576, "linear_dense_total": 4718592, "linear_dense_nnz": 1440402}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1147129, "linear_attention_total": 2359296, "linear_attention_nnz": 288000, "linear_dense_total": 4718592, "linear_dense_nnz": 859129}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870927, "linear_attention_total": 2359296, "linear_attention_nnz": 224768, "linear_dense_total": 4718592, "linear_dense_nnz": 646159}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 450671, "linear_attention_total": 2359296, "linear_attention_nnz": 124672, "linear_dense_total": 4718592, "linear_dense_nnz": 325999}}, "total_sparsity": 53.75399063078199, "linear_sparsity": 68.89292399088542}, "speed": {"eval_elapsed_time": 28.704244010150433}, "opt_eval_metrics": {"exact_match": 80.02838221381268, "f1": 87.5280353923367}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 58360680, "linear_total": 84934656, "linear_nnz": 34416900, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3232823, "linear_attention_total": 2359296, "linear_attention_nnz": 405824, "linear_dense_total": 4718592, "linear_dense_nnz": 2826999}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3412647, "linear_attention_total": 2359296, "linear_attention_nnz": 543872, "linear_dense_total": 4718592, "linear_dense_nnz": 2868775}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3556851, "linear_attention_total": 2359296, "linear_attention_nnz": 613248, "linear_dense_total": 4718592, "linear_dense_nnz": 2943603}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3708198, "linear_attention_total": 2359296, "linear_attention_nnz": 791424, "linear_dense_total": 4718592, "linear_dense_nnz": 2916774}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3695959, "linear_attention_total": 2359296, "linear_attention_nnz": 819072, "linear_dense_total": 4718592, "linear_dense_nnz": 2876887}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3644598, "linear_attention_total": 2359296, "linear_attention_nnz": 788928, "linear_dense_total": 4718592, "linear_dense_nnz": 2855670}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3486486, "linear_attention_total": 2359296, "linear_attention_nnz": 761600, "linear_dense_total": 4718592, "linear_dense_nnz": 2724886}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114460, "linear_attention_total": 2359296, "linear_attention_nnz": 686464, "linear_dense_total": 4718592, "linear_dense_nnz": 2427996}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2616038, "linear_attention_total": 2359296, "linear_attention_nnz": 602496, "linear_dense_total": 4718592, "linear_dense_nnz": 2013542}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1775741, "linear_attention_total": 2359296, "linear_attention_nnz": 381632, "linear_dense_total": 4718592, "linear_dense_nnz": 1394109}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1407393, "linear_attention_total": 2359296, "linear_attention_nnz": 325760, "linear_dense_total": 4718592, "linear_dense_nnz": 1081633}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 765706, "linear_attention_total": 2359296, "linear_attention_nnz": 174016, "linear_dense_total": 4718592, "linear_dense_nnz": 591690}}, "total_sparsity": 46.405572153982156, "linear_sparsity": 59.47837829589844}, "speed": {"eval_elapsed_time": 33.08102096617222}, "opt_eval_metrics": {"exact_match": 81.00283822138127, "f1": 88.2671108560581}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 58344499, "linear_total": 84934656, "linear_nnz": 34400721, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3231632, "linear_attention_total": 2359296, "linear_attention_nnz": 404736, "linear_dense_total": 4718592, "linear_dense_nnz": 2826896}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3411716, "linear_attention_total": 2359296, "linear_attention_nnz": 543040, "linear_dense_total": 4718592, "linear_dense_nnz": 2868676}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3557965, "linear_attention_total": 2359296, "linear_attention_nnz": 614464, "linear_dense_total": 4718592, "linear_dense_nnz": 2943501}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3706774, "linear_attention_total": 2359296, "linear_attention_nnz": 790144, "linear_dense_total": 4718592, "linear_dense_nnz": 2916630}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3693580, "linear_attention_total": 2359296, "linear_attention_nnz": 816832, "linear_dense_total": 4718592, "linear_dense_nnz": 2876748}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3641505, "linear_attention_total": 2359296, "linear_attention_nnz": 785920, "linear_dense_total": 4718592, "linear_dense_nnz": 2855585}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3484162, "linear_attention_total": 2359296, "linear_attention_nnz": 759424, "linear_dense_total": 4718592, "linear_dense_nnz": 2724738}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114894, "linear_attention_total": 2359296, "linear_attention_nnz": 687040, "linear_dense_total": 4718592, "linear_dense_nnz": 2427854}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2617066, "linear_attention_total": 2359296, "linear_attention_nnz": 603648, "linear_dense_total": 4718592, "linear_dense_nnz": 2013418}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1773359, "linear_attention_total": 2359296, "linear_attention_nnz": 379328, "linear_dense_total": 4718592, "linear_dense_nnz": 1394031}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404175, "linear_attention_total": 2359296, "linear_attention_nnz": 322624, "linear_dense_total": 4718592, "linear_dense_nnz": 1081551}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 763893, "linear_attention_total": 2359296, "linear_attention_nnz": 172288, "linear_dense_total": 4718592, "linear_dense_nnz": 591605}}, "total_sparsity": 46.42043166961797, "linear_sparsity": 59.49742705733687}, "speed": {"eval_elapsed_time": 33.090760480146855}, "opt_eval_metrics": {"exact_match": 81.01229895931883, "f1": 88.16022239737082}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 59478503, "linear_total": 84934656, "linear_nnz": 35536574, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3362923, "linear_attention_total": 2359296, "linear_attention_nnz": 466432, "linear_dense_total": 4718592, "linear_dense_nnz": 2896491}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3511822, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 2933262}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3642442, "linear_attention_total": 2359296, "linear_attention_nnz": 636672, "linear_dense_total": 4718592, "linear_dense_nnz": 3005770}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3843335, "linear_attention_total": 2359296, "linear_attention_nnz": 857344, "linear_dense_total": 4718592, "linear_dense_nnz": 2985991}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3768311, "linear_attention_total": 2359296, "linear_attention_nnz": 829184, "linear_dense_total": 4718592, "linear_dense_nnz": 2939127}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3670401, "linear_attention_total": 2359296, "linear_attention_nnz": 754432, "linear_dense_total": 4718592, "linear_dense_nnz": 2915969}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3555086, "linear_attention_total": 2359296, "linear_attention_nnz": 767488, "linear_dense_total": 4718592, "linear_dense_nnz": 2787598}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3250234, "linear_attention_total": 2359296, "linear_attention_nnz": 752640, "linear_dense_total": 4718592, "linear_dense_nnz": 2497594}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2669249, "linear_attention_total": 2359296, "linear_attention_nnz": 553472, "linear_dense_total": 4718592, "linear_dense_nnz": 2115777}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1903656, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 1490216}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522534, "linear_attention_total": 2359296, "linear_attention_nnz": 353792, "linear_dense_total": 4718592, "linear_dense_nnz": 1168742}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 836581, "linear_attention_total": 2359296, "linear_attention_nnz": 204032, "linear_dense_total": 4718592, "linear_dense_nnz": 632549}}, "total_sparsity": 45.379040521415185, "linear_sparsity": 58.160101337197375}, "speed": {"eval_elapsed_time": 30.383016001898795}, "opt_eval_metrics": {"exact_match": 80.93661305581836, "f1": 88.29241912882233}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 59470230, "linear_total": 84934656, "linear_nnz": 35528301, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3365714, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 2896466}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3508110, "linear_attention_total": 2359296, "linear_attention_nnz": 574976, "linear_dense_total": 4718592, "linear_dense_nnz": 2933134}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3640290, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 3005666}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3837370, "linear_attention_total": 2359296, "linear_attention_nnz": 851456, "linear_dense_total": 4718592, "linear_dense_nnz": 2985914}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3769702, "linear_attention_total": 2359296, "linear_attention_nnz": 830720, "linear_dense_total": 4718592, "linear_dense_nnz": 2938982}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3672353, "linear_attention_total": 2359296, "linear_attention_nnz": 756480, "linear_dense_total": 4718592, "linear_dense_nnz": 2915873}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3555719, "linear_attention_total": 2359296, "linear_attention_nnz": 768256, "linear_dense_total": 4718592, "linear_dense_nnz": 2787463}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3250893, "linear_attention_total": 2359296, "linear_attention_nnz": 753408, "linear_dense_total": 4718592, "linear_dense_nnz": 2497485}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666597, "linear_attention_total": 2359296, "linear_attention_nnz": 550912, "linear_dense_total": 4718592, "linear_dense_nnz": 2115685}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1903316, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 1490132}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1524282, "linear_attention_total": 2359296, "linear_attention_nnz": 355584, "linear_dense_total": 4718592, "linear_dense_nnz": 1168698}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 833955, "linear_attention_total": 2359296, "linear_attention_nnz": 201472, "linear_dense_total": 4718592, "linear_dense_nnz": 632483}}, "total_sparsity": 45.38663787466004, "linear_sparsity": 58.16984176635742}, "speed": {"eval_elapsed_time": 30.506126267835498}, "opt_eval_metrics": {"exact_match": 80.77578051087986, "f1": 88.22778160568927}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41069735, "linear_total": 84934656, "linear_nnz": 17134148, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825423, "linear_attention_total": 2359296, "linear_attention_nnz": 185152, "linear_dense_total": 4718592, "linear_dense_nnz": 1640271}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2011232, "linear_attention_total": 2359296, "linear_attention_nnz": 309376, "linear_dense_total": 4718592, "linear_dense_nnz": 1701856}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2013521, "linear_attention_total": 2359296, "linear_attention_nnz": 266368, "linear_dense_total": 4718592, "linear_dense_nnz": 1747153}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2151481, "linear_attention_total": 2359296, "linear_attention_nnz": 452288, "linear_dense_total": 4718592, "linear_dense_nnz": 1699193}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1937929, "linear_attention_total": 2359296, "linear_attention_nnz": 315584, "linear_dense_total": 4718592, "linear_dense_nnz": 1622345}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1906344, "linear_attention_total": 2359296, "linear_attention_nnz": 324160, "linear_dense_total": 4718592, "linear_dense_nnz": 1582184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660767, "linear_attention_total": 2359296, "linear_attention_nnz": 264448, "linear_dense_total": 4718592, "linear_dense_nnz": 1396319}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1418922, "linear_attention_total": 2359296, "linear_attention_nnz": 312704, "linear_dense_total": 4718592, "linear_dense_nnz": 1106218}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 973188, "linear_attention_total": 2359296, "linear_attention_nnz": 176128, "linear_dense_total": 4718592, "linear_dense_nnz": 797060}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574608, "linear_attention_total": 2359296, "linear_attention_nnz": 178368, "linear_dense_total": 4718592, "linear_dense_nnz": 396240}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 423537, "linear_attention_total": 2359296, "linear_attention_nnz": 140224, "linear_dense_total": 4718592, "linear_dense_nnz": 283313}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 237196, "linear_attention_total": 2359296, "linear_attention_nnz": 82304, "linear_dense_total": 4718592, "linear_dense_nnz": 154892}}, "total_sparsity": 62.28438480989986, "linear_sparsity": 79.82667051715615}, "speed": {"eval_elapsed_time": 28.065979121020064}, "opt_eval_metrics": {"exact_match": 78.59981078524125, "f1": 86.70965342219107}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40928357, "linear_total": 84934656, "linear_nnz": 16992855, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1811376, "linear_attention_total": 2359296, "linear_attention_nnz": 181120, "linear_dense_total": 4718592, "linear_dense_nnz": 1630256}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1999638, "linear_attention_total": 2359296, "linear_attention_nnz": 307392, "linear_dense_total": 4718592, "linear_dense_nnz": 1692246}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2004326, "linear_attention_total": 2359296, "linear_attention_nnz": 266880, "linear_dense_total": 4718592, "linear_dense_nnz": 1737446}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2132105, "linear_attention_total": 2359296, "linear_attention_nnz": 442304, "linear_dense_total": 4718592, "linear_dense_nnz": 1689801}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1922729, "linear_attention_total": 2359296, "linear_attention_nnz": 309632, "linear_dense_total": 4718592, "linear_dense_nnz": 1613097}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886520, "linear_attention_total": 2359296, "linear_attention_nnz": 313664, "linear_dense_total": 4718592, "linear_dense_nnz": 1572856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1646649, "linear_attention_total": 2359296, "linear_attention_nnz": 259072, "linear_dense_total": 4718592, "linear_dense_nnz": 1387577}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404939, "linear_attention_total": 2359296, "linear_attention_nnz": 306112, "linear_dense_total": 4718592, "linear_dense_nnz": 1098827}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 964630, "linear_attention_total": 2359296, "linear_attention_nnz": 173184, "linear_dense_total": 4718592, "linear_dense_nnz": 791446}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566118, "linear_attention_total": 2359296, "linear_attention_nnz": 172928, "linear_dense_total": 4718592, "linear_dense_nnz": 393190}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417586, "linear_attention_total": 2359296, "linear_attention_nnz": 136448, "linear_dense_total": 4718592, "linear_dense_nnz": 281138}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 236239, "linear_attention_total": 2359296, "linear_attention_nnz": 82304, "linear_dense_total": 4718592, "linear_dense_nnz": 153935}}, "total_sparsity": 62.414216625088, "linear_sparsity": 79.99302546183267}, "speed": {"eval_elapsed_time": 28.04132828908041}, "opt_eval_metrics": {"exact_match": 78.78902554399244, "f1": 86.80367154149816}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 40912185, "linear_total": 84934656, "linear_nnz": 16976675, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1810859, "linear_attention_total": 2359296, "linear_attention_nnz": 180736, "linear_dense_total": 4718592, "linear_dense_nnz": 1630123}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998023, "linear_attention_total": 2359296, "linear_attention_nnz": 305920, "linear_dense_total": 4718592, "linear_dense_nnz": 1692103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2001199, "linear_attention_total": 2359296, "linear_attention_nnz": 263936, "linear_dense_total": 4718592, "linear_dense_nnz": 1737263}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2132118, "linear_attention_total": 2359296, "linear_attention_nnz": 442496, "linear_dense_total": 4718592, "linear_dense_nnz": 1689622}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1919231, "linear_attention_total": 2359296, "linear_attention_nnz": 306304, "linear_dense_total": 4718592, "linear_dense_nnz": 1612927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1884897, "linear_attention_total": 2359296, "linear_attention_nnz": 312128, "linear_dense_total": 4718592, "linear_dense_nnz": 1572769}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1645758, "linear_attention_total": 2359296, "linear_attention_nnz": 258304, "linear_dense_total": 4718592, "linear_dense_nnz": 1387454}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404565, "linear_attention_total": 2359296, "linear_attention_nnz": 305856, "linear_dense_total": 4718592, "linear_dense_nnz": 1098709}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 963790, "linear_attention_total": 2359296, "linear_attention_nnz": 172480, "linear_dense_total": 4718592, "linear_dense_nnz": 791310}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 564071, "linear_attention_total": 2359296, "linear_attention_nnz": 170944, "linear_dense_total": 4718592, "linear_dense_nnz": 393127}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 416716, "linear_attention_total": 2359296, "linear_attention_nnz": 135616, "linear_dense_total": 4718592, "linear_dense_nnz": 281100}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 235448, "linear_attention_total": 2359296, "linear_attention_nnz": 81536, "linear_dense_total": 4718592, "linear_dense_nnz": 153912}}, "total_sparsity": 62.42906787574385, "linear_sparsity": 80.01207540064682}, "speed": {"eval_elapsed_time": 28.038834661012515}, "opt_eval_metrics": {"exact_match": 78.6092715231788, "f1": 86.70267601348202}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39496838, "linear_total": 84934656, "linear_nnz": 15559744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1647204, "linear_attention_total": 2359296, "linear_attention_nnz": 145232, "linear_dense_total": 4718592, "linear_dense_nnz": 1501972}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842586, "linear_attention_total": 2359296, "linear_attention_nnz": 280192, "linear_dense_total": 4718592, "linear_dense_nnz": 1562394}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841850, "linear_attention_total": 2359296, "linear_attention_nnz": 234064, "linear_dense_total": 4718592, "linear_dense_nnz": 1607786}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1960732, "linear_attention_total": 2359296, "linear_attention_nnz": 386752, "linear_dense_total": 4718592, "linear_dense_nnz": 1573980}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1776766, "linear_attention_total": 2359296, "linear_attention_nnz": 281632, "linear_dense_total": 4718592, "linear_dense_nnz": 1495134}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1744230, "linear_attention_total": 2359296, "linear_attention_nnz": 288320, "linear_dense_total": 4718592, "linear_dense_nnz": 1455910}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518906, "linear_attention_total": 2359296, "linear_attention_nnz": 240864, "linear_dense_total": 4718592, "linear_dense_nnz": 1278042}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1276331, "linear_attention_total": 2359296, "linear_attention_nnz": 275424, "linear_dense_total": 4718592, "linear_dense_nnz": 1000907}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 878990, "linear_attention_total": 2359296, "linear_attention_nnz": 170816, "linear_dense_total": 4718592, "linear_dense_nnz": 708174}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496916, "linear_attention_total": 2359296, "linear_attention_nnz": 165920, "linear_dense_total": 4718592, "linear_dense_nnz": 330996}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360001, "linear_attention_total": 2359296, "linear_attention_nnz": 126288, "linear_dense_total": 4718592, "linear_dense_nnz": 233713}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 215232, "linear_attention_total": 2359296, "linear_attention_nnz": 73824, "linear_dense_total": 4718592, "linear_dense_nnz": 141408}}, "total_sparsity": 63.728825052469304, "linear_sparsity": 81.68033552758487}, "speed": {"eval_elapsed_time": 29.592536952113733}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 87.22039562207584}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39357122, "linear_total": 84934656, "linear_nnz": 15420094, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1634237, "linear_attention_total": 2359296, "linear_attention_nnz": 142224, "linear_dense_total": 4718592, "linear_dense_nnz": 1492013}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828361, "linear_attention_total": 2359296, "linear_attention_nnz": 275696, "linear_dense_total": 4718592, "linear_dense_nnz": 1552665}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825967, "linear_attention_total": 2359296, "linear_attention_nnz": 227984, "linear_dense_total": 4718592, "linear_dense_nnz": 1597983}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1943566, "linear_attention_total": 2359296, "linear_attention_nnz": 379616, "linear_dense_total": 4718592, "linear_dense_nnz": 1563950}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1761455, "linear_attention_total": 2359296, "linear_attention_nnz": 275824, "linear_dense_total": 4718592, "linear_dense_nnz": 1485631}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1729290, "linear_attention_total": 2359296, "linear_attention_nnz": 282736, "linear_dense_total": 4718592, "linear_dense_nnz": 1446554}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1504955, "linear_attention_total": 2359296, "linear_attention_nnz": 235856, "linear_dense_total": 4718592, "linear_dense_nnz": 1269099}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1263225, "linear_attention_total": 2359296, "linear_attention_nnz": 269520, "linear_dense_total": 4718592, "linear_dense_nnz": 993705}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870496, "linear_attention_total": 2359296, "linear_attention_nnz": 167616, "linear_dense_total": 4718592, "linear_dense_nnz": 702880}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489695, "linear_attention_total": 2359296, "linear_attention_nnz": 161552, "linear_dense_total": 4718592, "linear_dense_nnz": 328143}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 355803, "linear_attention_total": 2359296, "linear_attention_nnz": 124096, "linear_dense_total": 4718592, "linear_dense_nnz": 231707}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 213044, "linear_attention_total": 2359296, "linear_attention_nnz": 72608, "linear_dense_total": 4718592, "linear_dense_nnz": 140436}}, "total_sparsity": 63.85713060135829, "linear_sparsity": 81.84475604398752}, "speed": {"eval_elapsed_time": 29.660654196050018}, "opt_eval_metrics": {"exact_match": 79.12961210974456, "f1": 87.04337592394437}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 39354055, "linear_total": 84934656, "linear_nnz": 15417031, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1634041, "linear_attention_total": 2359296, "linear_attention_nnz": 142224, "linear_dense_total": 4718592, "linear_dense_nnz": 1491817}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828346, "linear_attention_total": 2359296, "linear_attention_nnz": 275888, "linear_dense_total": 4718592, "linear_dense_nnz": 1552458}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825560, "linear_attention_total": 2359296, "linear_attention_nnz": 227744, "linear_dense_total": 4718592, "linear_dense_nnz": 1597816}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1942802, "linear_attention_total": 2359296, "linear_attention_nnz": 379008, "linear_dense_total": 4718592, "linear_dense_nnz": 1563794}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1761660, "linear_attention_total": 2359296, "linear_attention_nnz": 276192, "linear_dense_total": 4718592, "linear_dense_nnz": 1485468}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1728493, "linear_attention_total": 2359296, "linear_attention_nnz": 282096, "linear_dense_total": 4718592, "linear_dense_nnz": 1446397}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1504843, "linear_attention_total": 2359296, "linear_attention_nnz": 235856, "linear_dense_total": 4718592, "linear_dense_nnz": 1268987}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1262994, "linear_attention_total": 2359296, "linear_attention_nnz": 269456, "linear_dense_total": 4718592, "linear_dense_nnz": 993538}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870263, "linear_attention_total": 2359296, "linear_attention_nnz": 167520, "linear_dense_total": 4718592, "linear_dense_nnz": 702743}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489503, "linear_attention_total": 2359296, "linear_attention_nnz": 161424, "linear_dense_total": 4718592, "linear_dense_nnz": 328079}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 355610, "linear_attention_total": 2359296, "linear_attention_nnz": 123920, "linear_dense_total": 4718592, "linear_dense_nnz": 231690}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 212916, "linear_attention_total": 2359296, "linear_attention_nnz": 72512, "linear_dense_total": 4718592, "linear_dense_nnz": 140404}}, "total_sparsity": 63.859947122862216, "linear_sparsity": 81.84836234575437}, "speed": {"eval_elapsed_time": 29.750202575000003}, "opt_eval_metrics": {"exact_match": 79.09176915799432, "f1": 86.93076968810146}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l10--2021-01-19--17-00-07/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 56967217, "linear_total": 84934656, "linear_nnz": 33019881, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3067235, "linear_attention_total": 2359296, "linear_attention_nnz": 356016, "linear_dense_total": 4718592, "linear_dense_nnz": 2711219}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3260347, "linear_attention_total": 2359296, "linear_attention_nnz": 506400, "linear_dense_total": 4718592, "linear_dense_nnz": 2753947}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3486676, "linear_attention_total": 2359296, "linear_attention_nnz": 658880, "linear_dense_total": 4718592, "linear_dense_nnz": 2827796}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3592390, "linear_attention_total": 2359296, "linear_attention_nnz": 782176, "linear_dense_total": 4718592, "linear_dense_nnz": 2810214}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3644732, "linear_attention_total": 2359296, "linear_attention_nnz": 874272, "linear_dense_total": 4718592, "linear_dense_nnz": 2770460}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3523230, "linear_attention_total": 2359296, "linear_attention_nnz": 772928, "linear_dense_total": 4718592, "linear_dense_nnz": 2750302}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3378315, "linear_attention_total": 2359296, "linear_attention_nnz": 767984, "linear_dense_total": 4718592, "linear_dense_nnz": 2610331}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2983346, "linear_attention_total": 2359296, "linear_attention_nnz": 687968, "linear_dense_total": 4718592, "linear_dense_nnz": 2295378}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465095, "linear_attention_total": 2359296, "linear_attention_nnz": 596368, "linear_dense_total": 4718592, "linear_dense_nnz": 1868727}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1650223, "linear_attention_total": 2359296, "linear_attention_nnz": 404448, "linear_dense_total": 4718592, "linear_dense_nnz": 1245775}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1262562, "linear_attention_total": 2359296, "linear_attention_nnz": 305952, "linear_dense_total": 4718592, "linear_dense_nnz": 956610}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705730, "linear_attention_total": 2359296, "linear_attention_nnz": 172864, "linear_dense_total": 4718592, "linear_dense_nnz": 532866}}, "total_sparsity": 47.6852325727709, "linear_sparsity": 61.12319451791268}, "speed": {"eval_elapsed_time": 35.13715321500786}, "opt_eval_metrics": {"exact_match": 81.3434247871334, "f1": 88.502960365548}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l40--2021-01-19--16-58-18/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 42014844, "linear_total": 84934656, "linear_nnz": 18080164, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1930333, "linear_attention_total": 2359296, "linear_attention_nnz": 211712, "linear_dense_total": 4718592, "linear_dense_nnz": 1718621}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2116878, "linear_attention_total": 2359296, "linear_attention_nnz": 345600, "linear_dense_total": 4718592, "linear_dense_nnz": 1771278}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2094823, "linear_attention_total": 2359296, "linear_attention_nnz": 278016, "linear_dense_total": 4718592, "linear_dense_nnz": 1816807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2266081, "linear_attention_total": 2359296, "linear_attention_nnz": 493312, "linear_dense_total": 4718592, "linear_dense_nnz": 1772769}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1986893, "linear_attention_total": 2359296, "linear_attention_nnz": 304128, "linear_dense_total": 4718592, "linear_dense_nnz": 1682765}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1992507, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 1635131}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1736239, "linear_attention_total": 2359296, "linear_attention_nnz": 278528, "linear_dense_total": 4718592, "linear_dense_nnz": 1457711}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529879, "linear_attention_total": 2359296, "linear_attention_nnz": 355072, "linear_dense_total": 4718592, "linear_dense_nnz": 1174807}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1051417, "linear_attention_total": 2359296, "linear_attention_nnz": 183552, "linear_dense_total": 4718592, "linear_dense_nnz": 867865}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 636321, "linear_attention_total": 2359296, "linear_attention_nnz": 196864, "linear_dense_total": 4718592, "linear_dense_nnz": 439457}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 483651, "linear_attention_total": 2359296, "linear_attention_nnz": 157696, "linear_dense_total": 4718592, "linear_dense_nnz": 325955}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 255142, "linear_attention_total": 2359296, "linear_attention_nnz": 90368, "linear_dense_total": 4718592, "linear_dense_nnz": 164774}}, "total_sparsity": 61.41646181607727, "linear_sparsity": 78.7128542676384}, "speed": {"eval_elapsed_time": 27.474724027095363}, "opt_eval_metrics": {"exact_match": 78.86471144749291, "f1": 86.87223379259328}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_0/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39362145, "linear_total": 84934656, "linear_nnz": 15449344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1074688, "linear_attention_total": 2359296, "linear_attention_nnz": 796672, "linear_dense_total": 4718592, "linear_dense_nnz": 278016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236224, "linear_attention_total": 2359296, "linear_attention_nnz": 769280, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1635328, "linear_attention_total": 2359296, "linear_attention_nnz": 1056256, "linear_dense_total": 4718592, "linear_dense_nnz": 579072}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 1259264, "linear_dense_total": 4718592, "linear_dense_nnz": 640512}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2041856, "linear_attention_total": 2359296, "linear_attention_nnz": 1344512, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860096, "linear_attention_total": 2359296, "linear_attention_nnz": 1244160, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571072, "linear_attention_total": 2359296, "linear_attention_nnz": 1088768, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1299200, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 382464}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1150464, "linear_attention_total": 2359296, "linear_attention_nnz": 955392, "linear_dense_total": 4718592, "linear_dense_nnz": 195072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668928, "linear_attention_total": 2359296, "linear_attention_nnz": 590592, "linear_dense_total": 4718592, "linear_dense_nnz": 78336}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 548352, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 72192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 463360, "linear_attention_total": 2359296, "linear_attention_nnz": 308224, "linear_dense_total": 4718592, "linear_dense_nnz": 155136}}, "total_sparsity": 63.85251782420986, "linear_sparsity": 81.81031780478395}, "speed": {"eval_elapsed_time": 18.814206156879663}, "opt_eval_metrics": {"exact_match": 79.33774834437087, "f1": 87.07382313022637}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l20--2021-01-19--17-00-34/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 47377613, "linear_total": 84934656, "linear_nnz": 23436196, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2342065, "linear_attention_total": 2359296, "linear_attention_nnz": 233808, "linear_dense_total": 4718592, "linear_dense_nnz": 2108257}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2536721, "linear_attention_total": 2359296, "linear_attention_nnz": 370912, "linear_dense_total": 4718592, "linear_dense_nnz": 2165809}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2607352, "linear_attention_total": 2359296, "linear_attention_nnz": 368864, "linear_dense_total": 4718592, "linear_dense_nnz": 2238488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2740822, "linear_attention_total": 2359296, "linear_attention_nnz": 528528, "linear_dense_total": 4718592, "linear_dense_nnz": 2212294}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2662766, "linear_attention_total": 2359296, "linear_attention_nnz": 515168, "linear_dense_total": 4718592, "linear_dense_nnz": 2147598}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2582248, "linear_attention_total": 2359296, "linear_attention_nnz": 456576, "linear_dense_total": 4718592, "linear_dense_nnz": 2125672}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2384302, "linear_attention_total": 2359296, "linear_attention_nnz": 426512, "linear_dense_total": 4718592, "linear_dense_nnz": 1957790}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045939, "linear_attention_total": 2359296, "linear_attention_nnz": 424416, "linear_dense_total": 4718592, "linear_dense_nnz": 1621523}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1531552, "linear_attention_total": 2359296, "linear_attention_nnz": 311248, "linear_dense_total": 4718592, "linear_dense_nnz": 1220304}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 936640, "linear_attention_total": 2359296, "linear_attention_nnz": 249120, "linear_dense_total": 4718592, "linear_dense_nnz": 687520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 686193, "linear_attention_total": 2359296, "linear_attention_nnz": 189856, "linear_dense_total": 4718592, "linear_dense_nnz": 496337}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379596, "linear_attention_total": 2359296, "linear_attention_nnz": 106192, "linear_dense_total": 4718592, "linear_dense_nnz": 273404}}, "total_sparsity": 56.49166422589565, "linear_sparsity": 72.40679234634212}, "speed": {"eval_elapsed_time": 32.3695623409003}, "opt_eval_metrics": {"exact_match": 80.54872280037843, "f1": 88.09731480353894}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42067458, "linear_total": 84934656, "linear_nnz": 18108928, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437184, "linear_attention_total": 2359296, "linear_attention_nnz": 472576, "linear_dense_total": 4718592, "linear_dense_nnz": 964608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1754624, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015488, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2276608, "linear_attention_total": 2359296, "linear_attention_nnz": 951040, "linear_dense_total": 4718592, "linear_dense_nnz": 1325568}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2280448, "linear_attention_total": 2359296, "linear_attention_nnz": 861184, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2123008, "linear_attention_total": 2359296, "linear_attention_nnz": 779008, "linear_dense_total": 4718592, "linear_dense_nnz": 1344000}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841152, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1041408}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553664, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042432, "linear_attention_total": 2359296, "linear_attention_nnz": 610816, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 584960, "linear_attention_total": 2359296, "linear_attention_nnz": 405248, "linear_dense_total": 4718592, "linear_dense_nnz": 179712}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 540928, "linear_attention_total": 2359296, "linear_attention_nnz": 395008, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 658432, "linear_attention_total": 2359296, "linear_attention_nnz": 217600, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}}, "total_sparsity": 61.3681447432349, "linear_sparsity": 78.6789882330247}, "speed": {"eval_elapsed_time": 17.91969774197787}, "opt_eval_metrics": {"exact_match": 79.38505203405866, "f1": 87.07610213911921}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41735426, "linear_total": 84934656, "linear_nnz": 17776896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1405440, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 929280}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1732352, "linear_attention_total": 2359296, "linear_attention_nnz": 589568, "linear_dense_total": 4718592, "linear_dense_nnz": 1142784}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1979136, "linear_attention_total": 2359296, "linear_attention_nnz": 628992, "linear_dense_total": 4718592, "linear_dense_nnz": 1350144}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2218752, "linear_attention_total": 2359296, "linear_attention_nnz": 913152, "linear_dense_total": 4718592, "linear_dense_nnz": 1305600}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2257664, "linear_attention_total": 2359296, "linear_attention_nnz": 850688, "linear_dense_total": 4718592, "linear_dense_nnz": 1406976}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096384, "linear_attention_total": 2359296, "linear_attention_nnz": 764672, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1786112, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 1022976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1538816, "linear_attention_total": 2359296, "linear_attention_nnz": 781568, "linear_dense_total": 4718592, "linear_dense_nnz": 757248}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1027840, "linear_attention_total": 2359296, "linear_attention_nnz": 596224, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 176640}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523008, "linear_attention_total": 2359296, "linear_attention_nnz": 378624, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640000, "linear_attention_total": 2359296, "linear_attention_nnz": 208384, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.67306005721974, "linear_sparsity": 79.0699146412037}, "speed": {"eval_elapsed_time": 17.6927186998073}, "opt_eval_metrics": {"exact_match": 78.72280037842951, "f1": 86.62745564109652}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 43462146, "linear_total": 84934656, "linear_nnz": 19503616, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660672, "linear_attention_total": 2359296, "linear_attention_nnz": 579328, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 632576, "linear_dense_total": 4718592, "linear_dense_nnz": 1267200}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2031104, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1446912}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2544128, "linear_attention_total": 2359296, "linear_attention_nnz": 1049600, "linear_dense_total": 4718592, "linear_dense_nnz": 1494528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2395904, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 1479168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2184960, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 1394688}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1912320, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1113600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 969216, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 471808, "linear_dense_total": 4718592, "linear_dense_nnz": 497664}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 717312, "linear_attention_total": 2359296, "linear_attention_nnz": 505344, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631040, "linear_attention_total": 2359296, "linear_attention_nnz": 448256, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 288256, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}}, "total_sparsity": 60.08735936884057, "linear_sparsity": 77.03691647376543}, "speed": {"eval_elapsed_time": 16.150802633957937}, "opt_eval_metrics": {"exact_match": 78.93093661305582, "f1": 86.85787750084084}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42678018, "linear_total": 84934656, "linear_nnz": 18719488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1560320, "linear_attention_total": 2359296, "linear_attention_nnz": 543488, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1816320, "linear_attention_total": 2359296, "linear_attention_nnz": 593664, "linear_dense_total": 4718592, "linear_dense_nnz": 1222656}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2024704, "linear_attention_total": 2359296, "linear_attention_nnz": 603904, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2329856, "linear_attention_total": 2359296, "linear_attention_nnz": 870656, "linear_dense_total": 4718592, "linear_dense_nnz": 1459200}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2332928, "linear_attention_total": 2359296, "linear_attention_nnz": 887552, "linear_dense_total": 4718592, "linear_dense_nnz": 1445376}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 720640, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742080, "linear_attention_total": 2359296, "linear_attention_nnz": 926464, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 944384, "linear_attention_total": 2359296, "linear_attention_nnz": 455936, "linear_dense_total": 4718592, "linear_dense_nnz": 488448}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705280, "linear_attention_total": 2359296, "linear_attention_nnz": 505600, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 587264, "linear_attention_total": 2359296, "linear_attention_nnz": 409088, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 697856, "linear_attention_total": 2359296, "linear_attention_nnz": 250880, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}}, "total_sparsity": 60.80744850279245, "linear_sparsity": 77.96012972608024}, "speed": {"eval_elapsed_time": 15.863338297931477}, "opt_eval_metrics": {"exact_match": 78.78902554399244, "f1": 86.64151988736798}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 46192898, "linear_total": 84934656, "linear_nnz": 22234368, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 551680, "linear_dense_total": 4718592, "linear_dense_nnz": 1539072}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2278656, "linear_attention_total": 2359296, "linear_attention_nnz": 596736, "linear_dense_total": 4718592, "linear_dense_nnz": 1681920}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2418688, "linear_attention_total": 2359296, "linear_attention_nnz": 567808, "linear_dense_total": 4718592, "linear_dense_nnz": 1850880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2844416, "linear_attention_total": 2359296, "linear_attention_nnz": 1002752, "linear_dense_total": 4718592, "linear_dense_nnz": 1841664}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2691072, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 1812480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2475264, "linear_attention_total": 2359296, "linear_attention_nnz": 721152, "linear_dense_total": 4718592, "linear_dense_nnz": 1754112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2229248, "linear_attention_total": 2359296, "linear_attention_nnz": 805376, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1966336, "linear_attention_total": 2359296, "linear_attention_nnz": 892672, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701440, "linear_attention_total": 2359296, "linear_attention_nnz": 454144, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598272, "linear_attention_total": 2359296, "linear_attention_nnz": 361728, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858880, "linear_attention_total": 2359296, "linear_attention_nnz": 238336, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}}, "total_sparsity": 57.57962486284496, "linear_sparsity": 73.82179542824075}, "speed": {"eval_elapsed_time": 17.57372920983471}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 86.84346997900737}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-130000": {"stats": {"total": 108893186, "nnz": 38778370, "linear_total": 84934656, "linear_nnz": 14819840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1050624, "linear_attention_total": 2359296, "linear_attention_nnz": 488448, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 512512, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1628160, "linear_attention_total": 2359296, "linear_attention_nnz": 628224, "linear_dense_total": 4718592, "linear_dense_nnz": 999936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998592, "linear_attention_total": 2359296, "linear_attention_nnz": 937216, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1939968, "linear_attention_total": 2359296, "linear_attention_nnz": 821760, "linear_dense_total": 4718592, "linear_dense_nnz": 1118208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1709824, "linear_attention_total": 2359296, "linear_attention_nnz": 648448, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404928, "linear_attention_total": 2359296, "linear_attention_nnz": 641536, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817920, "linear_attention_total": 2359296, "linear_attention_nnz": 467712, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 544512, "linear_attention_total": 2359296, "linear_attention_nnz": 403200, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484096, "linear_attention_total": 2359296, "linear_attention_nnz": 367360, "linear_dense_total": 4718592, "linear_dense_nnz": 116736}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496896, "linear_attention_total": 2359296, "linear_attention_nnz": 225024, "linear_dense_total": 4718592, "linear_dense_nnz": 271872}}, "total_sparsity": 64.38861656596218, "linear_sparsity": 82.5514805169753}, "speed": {"eval_elapsed_time": 15.812317132018507}, "opt_eval_metrics": {"exact_match": 78.88363292336803, "f1": 86.63235572290178}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-165000": {"stats": {"total": 108893186, "nnz": 38293506, "linear_total": 84934656, "linear_nnz": 14334976, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1010688, "linear_attention_total": 2359296, "linear_attention_nnz": 468480, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1371392, "linear_attention_total": 2359296, "linear_attention_nnz": 518912, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1590272, "linear_attention_total": 2359296, "linear_attention_nnz": 608768, "linear_dense_total": 4718592, "linear_dense_nnz": 981504}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895936, "linear_attention_total": 2359296, "linear_attention_nnz": 869888, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1869568, "linear_attention_total": 2359296, "linear_attention_nnz": 775936, "linear_dense_total": 4718592, "linear_dense_nnz": 1093632}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663232, "linear_attention_total": 2359296, "linear_attention_nnz": 618752, "linear_dense_total": 4718592, "linear_dense_nnz": 1044480}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 629248, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1295872, "linear_attention_total": 2359296, "linear_attention_nnz": 707584, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 808704, "linear_attention_total": 2359296, "linear_attention_nnz": 463104, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 515840, "linear_attention_total": 2359296, "linear_attention_nnz": 376064, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455936, "linear_attention_total": 2359296, "linear_attention_nnz": 345344, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 212992, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 64.83388225963009, "linear_sparsity": 83.1223476080247}, "speed": {"eval_elapsed_time": 15.71152348187752}, "opt_eval_metrics": {"exact_match": 78.96877956480606, "f1": 86.71968503618079}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38916354, "linear_total": 84934656, "linear_nnz": 14957824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1209344, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 749568}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1494272, "linear_attention_total": 2359296, "linear_attention_nnz": 488192, "linear_dense_total": 4718592, "linear_dense_nnz": 1006080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1636096, "linear_attention_total": 2359296, "linear_attention_nnz": 550144, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969664, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1746944, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 1198080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1782272, "linear_attention_total": 2359296, "linear_attention_nnz": 653312, "linear_dense_total": 4718592, "linear_dense_nnz": 1128960}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1461760, "linear_attention_total": 2359296, "linear_attention_nnz": 593920, "linear_dense_total": 4718592, "linear_dense_nnz": 867840}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 754688, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531968, "linear_attention_total": 2359296, "linear_attention_nnz": 373760, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460032, "linear_attention_total": 2359296, "linear_attention_nnz": 311040, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}}, "total_sparsity": 64.26190156654981, "linear_sparsity": 82.38902150848766}, "speed": {"eval_elapsed_time": 15.398823922965676}, "opt_eval_metrics": {"exact_match": 78.7038789025544, "f1": 86.58426699451658}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 50872322, "linear_total": 84934656, "linear_nnz": 26913792, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692352, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 2007552}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666496, "linear_attention_total": 2359296, "linear_attention_nnz": 646656, "linear_dense_total": 4718592, "linear_dense_nnz": 2019840}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2931200, "linear_attention_total": 2359296, "linear_attention_nnz": 691712, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3361024, "linear_attention_total": 2359296, "linear_attention_nnz": 1149184, "linear_dense_total": 4718592, "linear_dense_nnz": 2211840}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3165952, "linear_attention_total": 2359296, "linear_attention_nnz": 1007872, "linear_dense_total": 4718592, "linear_dense_nnz": 2158080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070976, "linear_attention_total": 2359296, "linear_attention_nnz": 997376, "linear_dense_total": 4718592, "linear_dense_nnz": 2073600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2644480, "linear_attention_total": 2359296, "linear_attention_nnz": 911872, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2248704, "linear_attention_total": 2359296, "linear_attention_nnz": 944640, "linear_dense_total": 4718592, "linear_dense_nnz": 1304064}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1514240, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 751104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 839424, "linear_attention_total": 2359296, "linear_attention_nnz": 526080, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 707072, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 274944}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1071872, "linear_attention_total": 2359296, "linear_attention_nnz": 277760, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}}, "total_sparsity": 53.282364242699266, "linear_sparsity": 68.31235532407408}, "speed": {"eval_elapsed_time": 19.785655258921906}, "opt_eval_metrics": {"exact_match": 79.99053926206244, "f1": 87.56439208763325}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl225_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 27752545, "linear_total": 84934656, "linear_nnz": 3794015, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 465383, "linear_attention_total": 2359296, "linear_attention_nnz": 18728, "linear_dense_total": 4718592, "linear_dense_nnz": 446655}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527397, "linear_attention_total": 2359296, "linear_attention_nnz": 63059, "linear_dense_total": 4718592, "linear_dense_nnz": 464338}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516492, "linear_attention_total": 2359296, "linear_attention_nnz": 53761, "linear_dense_total": 4718592, "linear_dense_nnz": 462731}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 514972, "linear_attention_total": 2359296, "linear_attention_nnz": 84624, "linear_dense_total": 4718592, "linear_dense_nnz": 430348}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 443214, "linear_attention_total": 2359296, "linear_attention_nnz": 58345, "linear_dense_total": 4718592, "linear_dense_nnz": 384869}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 396921, "linear_attention_total": 2359296, "linear_attention_nnz": 50615, "linear_dense_total": 4718592, "linear_dense_nnz": 346306}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 319004, "linear_attention_total": 2359296, "linear_attention_nnz": 41344, "linear_dense_total": 4718592, "linear_dense_nnz": 277660}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 249183, "linear_attention_total": 2359296, "linear_attention_nnz": 47420, "linear_dense_total": 4718592, "linear_dense_nnz": 201763}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 161062, "linear_attention_total": 2359296, "linear_attention_nnz": 27562, "linear_dense_total": 4718592, "linear_dense_nnz": 133500}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 81705, "linear_attention_total": 2359296, "linear_attention_nnz": 34151, "linear_dense_total": 4718592, "linear_dense_nnz": 47554}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64643, "linear_attention_total": 2359296, "linear_attention_nnz": 21311, "linear_dense_total": 4718592, "linear_dense_nnz": 43332}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 54039, "linear_attention_total": 2359296, "linear_attention_nnz": 17233, "linear_dense_total": 4718592, "linear_dense_nnz": 36806}}, "total_sparsity": 74.51397463933142, "linear_sparsity": 95.5330189363456}, "speed": {"eval_elapsed_time": 28.275199214927852}, "opt_eval_metrics": {"exact_match": 77.39829706717124, "f1": 85.66626983371626}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41730197, "linear_total": 84934656, "linear_nnz": 17822208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2202624, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2397696, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1218048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2302464, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1122816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1794048, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1460736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1155072, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1290240, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 503808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 423936, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 227328}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 806400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 904704, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 511488}}, "total_sparsity": 61.67786201057612, "linear_sparsity": 79.0165653935185}, "speed": {"eval_elapsed_time": 12.337535696104169}, "opt_eval_metrics": {"exact_match": 77.70104068117313, "f1": 85.6071153919288}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 33917936, "linear_total": 84934656, "linear_nnz": 9959406, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1111233, "linear_attention_total": 2359296, "linear_attention_nnz": 56754, "linear_dense_total": 4718592, "linear_dense_nnz": 1054479}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222867, "linear_attention_total": 2359296, "linear_attention_nnz": 116764, "linear_dense_total": 4718592, "linear_dense_nnz": 1106103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264439, "linear_attention_total": 2359296, "linear_attention_nnz": 127558, "linear_dense_total": 4718592, "linear_dense_nnz": 1136881}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1270104, "linear_attention_total": 2359296, "linear_attention_nnz": 163709, "linear_dense_total": 4718592, "linear_dense_nnz": 1106395}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1202300, "linear_attention_total": 2359296, "linear_attention_nnz": 158018, "linear_dense_total": 4718592, "linear_dense_nnz": 1044282}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1136195, "linear_attention_total": 2359296, "linear_attention_nnz": 125746, "linear_dense_total": 4718592, "linear_dense_nnz": 1010449}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 971117, "linear_attention_total": 2359296, "linear_attention_nnz": 110023, "linear_dense_total": 4718592, "linear_dense_nnz": 861094}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746075, "linear_attention_total": 2359296, "linear_attention_nnz": 113086, "linear_dense_total": 4718592, "linear_dense_nnz": 632989}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488971, "linear_attention_total": 2359296, "linear_attention_nnz": 81879, "linear_dense_total": 4718592, "linear_dense_nnz": 407092}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 250695, "linear_attention_total": 2359296, "linear_attention_nnz": 77365, "linear_dense_total": 4718592, "linear_dense_nnz": 173330}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 172793, "linear_attention_total": 2359296, "linear_attention_nnz": 50915, "linear_dense_total": 4718592, "linear_dense_nnz": 121878}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 122617, "linear_attention_total": 2359296, "linear_attention_nnz": 28303, "linear_dense_total": 4718592, "linear_dense_nnz": 94314}}, "total_sparsity": 68.85210429971255, "linear_sparsity": 88.27403739646628}, "speed": {"eval_elapsed_time": 33.620146826142445}, "opt_eval_metrics": {"exact_match": 79.89593188268685, "f1": 87.64967103979136}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33825359, "linear_total": 84934656, "linear_nnz": 9866829, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1100628, "linear_attention_total": 2359296, "linear_attention_nnz": 56086, "linear_dense_total": 4718592, "linear_dense_nnz": 1044542}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211778, "linear_attention_total": 2359296, "linear_attention_nnz": 115328, "linear_dense_total": 4718592, "linear_dense_nnz": 1096450}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253069, "linear_attention_total": 2359296, "linear_attention_nnz": 125881, "linear_dense_total": 4718592, "linear_dense_nnz": 1127188}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258511, "linear_attention_total": 2359296, "linear_attention_nnz": 161525, "linear_dense_total": 4718592, "linear_dense_nnz": 1096986}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1191705, "linear_attention_total": 2359296, "linear_attention_nnz": 155911, "linear_dense_total": 4718592, "linear_dense_nnz": 1035794}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125428, "linear_attention_total": 2359296, "linear_attention_nnz": 123921, "linear_dense_total": 4718592, "linear_dense_nnz": 1001507}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 961919, "linear_attention_total": 2359296, "linear_attention_nnz": 108430, "linear_dense_total": 4718592, "linear_dense_nnz": 853489}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738628, "linear_attention_total": 2359296, "linear_attention_nnz": 111505, "linear_dense_total": 4718592, "linear_dense_nnz": 627123}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484188, "linear_attention_total": 2359296, "linear_attention_nnz": 80805, "linear_dense_total": 4718592, "linear_dense_nnz": 403383}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 247948, "linear_attention_total": 2359296, "linear_attention_nnz": 76456, "linear_dense_total": 4718592, "linear_dense_nnz": 171492}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 171235, "linear_attention_total": 2359296, "linear_attention_nnz": 50374, "linear_dense_total": 4718592, "linear_dense_nnz": 120861}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 121792, "linear_attention_total": 2359296, "linear_attention_nnz": 28038, "linear_dense_total": 4718592, "linear_dense_nnz": 93754}}, "total_sparsity": 68.93712063856779, "linear_sparsity": 88.38303530657733}, "speed": {"eval_elapsed_time": 33.69571442203596}, "opt_eval_metrics": {"exact_match": 79.8391674550615, "f1": 87.59923644792065}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl150_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 29470276, "linear_total": 84934656, "linear_nnz": 5511746, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655184, "linear_attention_total": 2359296, "linear_attention_nnz": 30729, "linear_dense_total": 4718592, "linear_dense_nnz": 624455}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 733131, "linear_attention_total": 2359296, "linear_attention_nnz": 77742, "linear_dense_total": 4718592, "linear_dense_nnz": 655389}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730379, "linear_attention_total": 2359296, "linear_attention_nnz": 70206, "linear_dense_total": 4718592, "linear_dense_nnz": 660173}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 734451, "linear_attention_total": 2359296, "linear_attention_nnz": 106339, "linear_dense_total": 4718592, "linear_dense_nnz": 628112}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655863, "linear_attention_total": 2359296, "linear_attention_nnz": 81845, "linear_dense_total": 4718592, "linear_dense_nnz": 574018}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 606306, "linear_attention_total": 2359296, "linear_attention_nnz": 68554, "linear_dense_total": 4718592, "linear_dense_nnz": 537752}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 492846, "linear_attention_total": 2359296, "linear_attention_nnz": 58217, "linear_dense_total": 4718592, "linear_dense_nnz": 434629}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379389, "linear_attention_total": 2359296, "linear_attention_nnz": 65705, "linear_dense_total": 4718592, "linear_dense_nnz": 313684}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 243207, "linear_attention_total": 2359296, "linear_attention_nnz": 39483, "linear_dense_total": 4718592, "linear_dense_nnz": 203724}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 119606, "linear_attention_total": 2359296, "linear_attention_nnz": 46007, "linear_dense_total": 4718592, "linear_dense_nnz": 73599}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 89281, "linear_attention_total": 2359296, "linear_attention_nnz": 27892, "linear_dense_total": 4718592, "linear_dense_nnz": 61389}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 72103, "linear_attention_total": 2359296, "linear_attention_nnz": 20781, "linear_dense_total": 4718592, "linear_dense_nnz": 51322}}, "total_sparsity": 72.93652882926945, "linear_sparsity": 93.51060419906804}, "speed": {"eval_elapsed_time": 30.31329287402332}, "opt_eval_metrics": {"exact_match": 78.4484389782403, "f1": 86.3547925481507}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 72624802, "linear_total": 84934656, "linear_nnz": 48687104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4657152, "linear_attention_total": 2359296, "linear_attention_nnz": 621568, "linear_dense_total": 4718592, "linear_dense_nnz": 4035584}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4759552, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 4155392}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5065728, "linear_attention_total": 2359296, "linear_attention_nnz": 781312, "linear_dense_total": 4718592, "linear_dense_nnz": 4284416}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5408768, "linear_attention_total": 2359296, "linear_attention_nnz": 1068032, "linear_dense_total": 4718592, "linear_dense_nnz": 4340736}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5324800, "linear_attention_total": 2359296, "linear_attention_nnz": 1087488, "linear_dense_total": 4718592, "linear_dense_nnz": 4237312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5189632, "linear_attention_total": 2359296, "linear_attention_nnz": 908288, "linear_dense_total": 4718592, "linear_dense_nnz": 4281344}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5169152, "linear_attention_total": 2359296, "linear_attention_nnz": 1019904, "linear_dense_total": 4718592, "linear_dense_nnz": 4149248}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4749312, "linear_attention_total": 2359296, "linear_attention_nnz": 921600, "linear_dense_total": 4718592, "linear_dense_nnz": 3827712}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3932160, "linear_attention_total": 2359296, "linear_attention_nnz": 851968, "linear_dense_total": 4718592, "linear_dense_nnz": 3080192}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1808384, "linear_attention_total": 2359296, "linear_attention_nnz": 529408, "linear_dense_total": 4718592, "linear_dense_nnz": 1278976}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1443840, "linear_attention_total": 2359296, "linear_attention_nnz": 486400, "linear_dense_total": 4718592, "linear_dense_nnz": 957440}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178624, "linear_attention_total": 2359296, "linear_attention_nnz": 286720, "linear_dense_total": 4718592, "linear_dense_nnz": 891904}}, "total_sparsity": 33.306385213120684, "linear_sparsity": 42.67698688271605}, "speed": {"eval_elapsed_time": 27.85703947697766}, "opt_eval_metrics": {"exact_match": 80.72847682119205, "f1": 88.08831525592305}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 72671586, "linear_total": 84934656, "linear_nnz": 48734208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4676608, "linear_attention_total": 2359296, "linear_attention_nnz": 644096, "linear_dense_total": 4718592, "linear_dense_nnz": 4032512}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4756480, "linear_attention_total": 2359296, "linear_attention_nnz": 583680, "linear_dense_total": 4718592, "linear_dense_nnz": 4172800}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5113856, "linear_attention_total": 2359296, "linear_attention_nnz": 789504, "linear_dense_total": 4718592, "linear_dense_nnz": 4324352}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5421056, "linear_attention_total": 2359296, "linear_attention_nnz": 1028096, "linear_dense_total": 4718592, "linear_dense_nnz": 4392960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5426176, "linear_attention_total": 2359296, "linear_attention_nnz": 1067008, "linear_dense_total": 4718592, "linear_dense_nnz": 4359168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5276672, "linear_attention_total": 2359296, "linear_attention_nnz": 943104, "linear_dense_total": 4718592, "linear_dense_nnz": 4333568}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5165056, "linear_attention_total": 2359296, "linear_attention_nnz": 1003520, "linear_dense_total": 4718592, "linear_dense_nnz": 4161536}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4797440, "linear_attention_total": 2359296, "linear_attention_nnz": 908288, "linear_dense_total": 4718592, "linear_dense_nnz": 3889152}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3890176, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 3021824}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1726464, "linear_attention_total": 2359296, "linear_attention_nnz": 520192, "linear_dense_total": 4718592, "linear_dense_nnz": 1206272}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1336320, "linear_attention_total": 2359296, "linear_attention_nnz": 445440, "linear_dense_total": 4718592, "linear_dense_nnz": 890880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1147904, "linear_attention_total": 2359296, "linear_attention_nnz": 272384, "linear_dense_total": 4718592, "linear_dense_nnz": 875520}}, "total_sparsity": 33.26342201062975, "linear_sparsity": 42.62152777777778}, "speed": {"eval_elapsed_time": 27.81183459307067}, "opt_eval_metrics": {"exact_match": 80.81362346263009, "f1": 88.10463591853348}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 72950082, "linear_total": 84934656, "linear_nnz": 49012736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4754432, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 4140032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4843520, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 4246528}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5145600, "linear_attention_total": 2359296, "linear_attention_nnz": 788480, "linear_dense_total": 4718592, "linear_dense_nnz": 4357120}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5488640, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 4426752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5463040, "linear_attention_total": 2359296, "linear_attention_nnz": 1048576, "linear_dense_total": 4718592, "linear_dense_nnz": 4414464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5317632, "linear_attention_total": 2359296, "linear_attention_nnz": 918528, "linear_dense_total": 4718592, "linear_dense_nnz": 4399104}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5230592, "linear_attention_total": 2359296, "linear_attention_nnz": 998400, "linear_dense_total": 4718592, "linear_dense_nnz": 4232192}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4838400, "linear_attention_total": 2359296, "linear_attention_nnz": 899072, "linear_dense_total": 4718592, "linear_dense_nnz": 3939328}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3848192, "linear_attention_total": 2359296, "linear_attention_nnz": 819200, "linear_dense_total": 4718592, "linear_dense_nnz": 3028992}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1620992, "linear_attention_total": 2359296, "linear_attention_nnz": 516096, "linear_dense_total": 4718592, "linear_dense_nnz": 1104896}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1332224, "linear_attention_total": 2359296, "linear_attention_nnz": 450560, "linear_dense_total": 4718592, "linear_dense_nnz": 881664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1129472, "linear_attention_total": 2359296, "linear_attention_nnz": 266240, "linear_dense_total": 4718592, "linear_dense_nnz": 863232}}, "total_sparsity": 33.00767047076757, "linear_sparsity": 42.29359567901234}, "speed": {"eval_elapsed_time": 27.788447924889624}, "opt_eval_metrics": {"exact_match": 80.53926206244087, "f1": 87.95145431777735}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39356610, "linear_total": 84934656, "linear_nnz": 15444992, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1024000, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 512000}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236992, "linear_attention_total": 2359296, "linear_attention_nnz": 551936, "linear_dense_total": 4718592, "linear_dense_nnz": 685056}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1934336, "linear_attention_total": 2359296, "linear_attention_nnz": 722944, "linear_dense_total": 4718592, "linear_dense_nnz": 1211392}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2352128, "linear_attention_total": 2359296, "linear_attention_nnz": 954368, "linear_dense_total": 4718592, "linear_dense_nnz": 1397760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028544, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1238016}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1880064, "linear_attention_total": 2359296, "linear_attention_nnz": 584704, "linear_dense_total": 4718592, "linear_dense_nnz": 1295360}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1627136, "linear_attention_total": 2359296, "linear_attention_nnz": 608256, "linear_dense_total": 4718592, "linear_dense_nnz": 1018880}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1316864, "linear_attention_total": 2359296, "linear_attention_nnz": 740352, "linear_dense_total": 4718592, "linear_dense_nnz": 576512}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 673792, "linear_attention_total": 2359296, "linear_attention_nnz": 510976, "linear_dense_total": 4718592, "linear_dense_nnz": 162816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 451584, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 94208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 304128, "linear_dense_total": 4718592, "linear_dense_nnz": 197632}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417792, "linear_attention_total": 2359296, "linear_attention_nnz": 197632, "linear_dense_total": 4718592, "linear_dense_nnz": 220160}}, "total_sparsity": 63.85760078688487, "linear_sparsity": 81.81544174382715}, "speed": {"eval_elapsed_time": 17.21582882408984}, "opt_eval_metrics": {"exact_match": 76.79280983916746, "f1": 85.3167029862563}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39183362, "linear_total": 84934656, "linear_nnz": 15271936, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1032192, "linear_attention_total": 2359296, "linear_attention_nnz": 513024, "linear_dense_total": 4718592, "linear_dense_nnz": 519168}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1215488, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 692224}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1922048, "linear_attention_total": 2359296, "linear_attention_nnz": 683008, "linear_dense_total": 4718592, "linear_dense_nnz": 1239040}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2319360, "linear_attention_total": 2359296, "linear_attention_nnz": 945152, "linear_dense_total": 4718592, "linear_dense_nnz": 1374208}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045952, "linear_attention_total": 2359296, "linear_attention_nnz": 809984, "linear_dense_total": 4718592, "linear_dense_nnz": 1235968}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1847296, "linear_attention_total": 2359296, "linear_attention_nnz": 581632, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1607680, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1287168, "linear_attention_total": 2359296, "linear_attention_nnz": 708608, "linear_dense_total": 4718592, "linear_dense_nnz": 578560}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631808, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 158720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 442368, "linear_attention_total": 2359296, "linear_attention_nnz": 352256, "linear_dense_total": 4718592, "linear_dense_nnz": 90112}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 312320, "linear_dense_total": 4718592, "linear_dense_nnz": 206848}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 401408, "linear_attention_total": 2359296, "linear_attention_nnz": 186368, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 64.01669981444019, "linear_sparsity": 82.0191936728395}, "speed": {"eval_elapsed_time": 17.197634894168004}, "opt_eval_metrics": {"exact_match": 77.04824976348155, "f1": 85.17930403802184}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 37334018, "linear_total": 84934656, "linear_nnz": 13375488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663488, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1282560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451520, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 566784}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1385472, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1265664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 675840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1069056, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.7150099364344, "linear_sparsity": 84.25202546296296}, "speed": {"eval_elapsed_time": 11.265181887894869}, "opt_eval_metrics": {"exact_match": 76.54683065279092, "f1": 84.56290825102765}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37189634, "linear_total": 84934656, "linear_nnz": 13231104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1658880, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1233408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1354752, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1387008, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 678912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.84760225492897, "linear_sparsity": 84.42201967592592}, "speed": {"eval_elapsed_time": 11.085542490938678}, "opt_eval_metrics": {"exact_match": 75.99810785241249, "f1": 84.26442986520863}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36773378, "linear_total": 84934656, "linear_nnz": 12814848, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1044480, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1177088, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1450496, "linear_attention_total": 2359296, "linear_attention_nnz": 492032, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652224, "linear_attention_total": 2359296, "linear_attention_nnz": 733696, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1511680, "linear_attention_total": 2359296, "linear_attention_nnz": 461056, "linear_dense_total": 4718592, "linear_dense_nnz": 1050624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1533952, "linear_attention_total": 2359296, "linear_attention_nnz": 580096, "linear_dense_total": 4718592, "linear_dense_nnz": 953856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1227520, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 764928}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 624384, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 700416, "linear_attention_total": 2359296, "linear_attention_nnz": 351744, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 479744, "linear_attention_total": 2359296, "linear_attention_nnz": 339968, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411392, "linear_attention_total": 2359296, "linear_attention_nnz": 276224, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 430080, "linear_attention_total": 2359296, "linear_attention_nnz": 178176, "linear_dense_total": 4718592, "linear_dense_nnz": 251904}}, "total_sparsity": 66.22986308803564, "linear_sparsity": 84.912109375}, "speed": {"eval_elapsed_time": 14.512992850970477}, "opt_eval_metrics": {"exact_match": 77.94701986754967, "f1": 86.06827252573265}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 47136529, "linear_total": 84934656, "linear_nnz": 23220736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1984512, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2164224, "linear_attention_total": 2359296, "linear_attention_nnz": 592896, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2625536, "linear_attention_total": 2359296, "linear_attention_nnz": 880640, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2992640, "linear_attention_total": 2359296, "linear_attention_nnz": 1230848, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2940928, "linear_attention_total": 2359296, "linear_attention_nnz": 1214464, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2535936, "linear_attention_total": 2359296, "linear_attention_nnz": 906240, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2213376, "linear_attention_total": 2359296, "linear_attention_nnz": 943104, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1923584, "linear_attention_total": 2359296, "linear_attention_nnz": 935936, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1419264, "linear_attention_total": 2359296, "linear_attention_nnz": 872448, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 883712, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667648, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 869376, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 56.713059162397904, "linear_sparsity": 72.66046971450618}, "speed": {"eval_elapsed_time": 18.717300809919834}, "opt_eval_metrics": {"exact_match": 80.27436140018922, "f1": 87.70461789964966}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46572775, "linear_total": 84934656, "linear_nnz": 22657536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1908736, "linear_attention_total": 2359296, "linear_attention_nnz": 627712, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2145280, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 1548288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2499072, "linear_attention_total": 2359296, "linear_attention_nnz": 789504, "linear_dense_total": 4718592, "linear_dense_nnz": 1709568}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2920960, "linear_attention_total": 2359296, "linear_attention_nnz": 1180672, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1204224, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2516992, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 1600512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2151936, "linear_attention_total": 2359296, "linear_attention_nnz": 909312, "linear_dense_total": 4718592, "linear_dense_nnz": 1242624}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1889792, "linear_attention_total": 2359296, "linear_attention_nnz": 917504, "linear_dense_total": 4718592, "linear_dense_nnz": 972288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1398272, "linear_attention_total": 2359296, "linear_attention_nnz": 856064, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858624, "linear_attention_total": 2359296, "linear_attention_nnz": 611328, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 634368, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 827392, "linear_attention_total": 2359296, "linear_attention_nnz": 268288, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}}, "total_sparsity": 57.23077199706509, "linear_sparsity": 73.32356770833333}, "speed": {"eval_elapsed_time": 18.605645736912265}, "opt_eval_metrics": {"exact_match": 79.80132450331126, "f1": 87.48291010744668}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-10000": {"stats": {"total": 108893186, "nnz": 107798786, "linear_total": 84934656, "linear_nnz": 83840256, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6865920, "linear_attention_total": 2359296, "linear_attention_nnz": 2151936, "linear_dense_total": 4718592, "linear_dense_nnz": 4713984}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7009024, "linear_attention_total": 2359296, "linear_attention_nnz": 2299648, "linear_dense_total": 4718592, "linear_dense_nnz": 4709376}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7037952, "linear_attention_total": 2359296, "linear_attention_nnz": 2330112, "linear_dense_total": 4718592, "linear_dense_nnz": 4707840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7041024, "linear_attention_total": 2359296, "linear_attention_nnz": 2330112, "linear_dense_total": 4718592, "linear_dense_nnz": 4710912}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7029760, "linear_attention_total": 2359296, "linear_attention_nnz": 2324992, "linear_dense_total": 4718592, "linear_dense_nnz": 4704768}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7043584, "linear_attention_total": 2359296, "linear_attention_nnz": 2337280, "linear_dense_total": 4718592, "linear_dense_nnz": 4706304}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7006464, "linear_attention_total": 2359296, "linear_attention_nnz": 2321664, "linear_dense_total": 4718592, "linear_dense_nnz": 4684800}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7025664, "linear_attention_total": 2359296, "linear_attention_nnz": 2342400, "linear_dense_total": 4718592, "linear_dense_nnz": 4683264}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6950656, "linear_attention_total": 2359296, "linear_attention_nnz": 2296576, "linear_dense_total": 4718592, "linear_dense_nnz": 4654080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6905600, "linear_attention_total": 2359296, "linear_attention_nnz": 2259200, "linear_dense_total": 4718592, "linear_dense_nnz": 4646400}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6924288, "linear_attention_total": 2359296, "linear_attention_nnz": 2285568, "linear_dense_total": 4718592, "linear_dense_nnz": 4638720}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7000320, "linear_attention_total": 2359296, "linear_attention_nnz": 2312448, "linear_dense_total": 4718592, "linear_dense_nnz": 4687872}}, "total_sparsity": 1.005021563057218, "linear_sparsity": 1.288519965277779}, "speed": {"eval_elapsed_time": 41.11115196393803}, "opt_eval_metrics": {"exact_match": 80.90823084200568, "f1": 88.13888839423888}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-15000": {"stats": {"total": 108893186, "nnz": 104455426, "linear_total": 84934656, "linear_nnz": 80496896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6593280, "linear_attention_total": 2359296, "linear_attention_nnz": 1914624, "linear_dense_total": 4718592, "linear_dense_nnz": 4678656}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6772736, "linear_attention_total": 2359296, "linear_attention_nnz": 2103296, "linear_dense_total": 4718592, "linear_dense_nnz": 4669440}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6885632, "linear_attention_total": 2359296, "linear_attention_nnz": 2239232, "linear_dense_total": 4718592, "linear_dense_nnz": 4646400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6876672, "linear_attention_total": 2359296, "linear_attention_nnz": 2219520, "linear_dense_total": 4718592, "linear_dense_nnz": 4657152}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6873600, "linear_attention_total": 2359296, "linear_attention_nnz": 2216448, "linear_dense_total": 4718592, "linear_dense_nnz": 4657152}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6841856, "linear_attention_total": 2359296, "linear_attention_nnz": 2226176, "linear_dense_total": 4718592, "linear_dense_nnz": 4615680}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6786560, "linear_attention_total": 2359296, "linear_attention_nnz": 2190848, "linear_dense_total": 4718592, "linear_dense_nnz": 4595712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6811392, "linear_attention_total": 2359296, "linear_attention_nnz": 2261760, "linear_dense_total": 4718592, "linear_dense_nnz": 4549632}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6609408, "linear_attention_total": 2359296, "linear_attention_nnz": 2178048, "linear_dense_total": 4718592, "linear_dense_nnz": 4431360}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6399744, "linear_attention_total": 2359296, "linear_attention_nnz": 2049792, "linear_dense_total": 4718592, "linear_dense_nnz": 4349952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6406656, "linear_attention_total": 2359296, "linear_attention_nnz": 2053632, "linear_dense_total": 4718592, "linear_dense_nnz": 4353024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6639360, "linear_attention_total": 2359296, "linear_attention_nnz": 2100480, "linear_dense_total": 4718592, "linear_dense_nnz": 4538880}}, "total_sparsity": 4.075333051601593, "linear_sparsity": 5.224910783179015}, "speed": {"eval_elapsed_time": 40.64612381509505}, "opt_eval_metrics": {"exact_match": 78.21192052980132, "f1": 86.2154189083501}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 58295010, "linear_total": 84934656, "linear_nnz": 34364416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2712576, "linear_attention_total": 2359296, "linear_attention_nnz": 934912, "linear_dense_total": 4718592, "linear_dense_nnz": 1777664}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2907136, "linear_attention_total": 2359296, "linear_attention_nnz": 738304, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4201472, "linear_attention_total": 2359296, "linear_attention_nnz": 1162240, "linear_dense_total": 4718592, "linear_dense_nnz": 3039232}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4531200, "linear_attention_total": 2359296, "linear_attention_nnz": 1366016, "linear_dense_total": 4718592, "linear_dense_nnz": 3165184}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4667392, "linear_attention_total": 2359296, "linear_attention_nnz": 1484800, "linear_dense_total": 4718592, "linear_dense_nnz": 3182592}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4599808, "linear_attention_total": 2359296, "linear_attention_nnz": 1414144, "linear_dense_total": 4718592, "linear_dense_nnz": 3185664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3828736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 2572288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2659328, "linear_attention_total": 2359296, "linear_attention_nnz": 991232, "linear_dense_total": 4718592, "linear_dense_nnz": 1668096}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1654784, "linear_attention_total": 2359296, "linear_attention_nnz": 966656, "linear_dense_total": 4718592, "linear_dense_nnz": 688128}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 927744, "linear_attention_total": 2359296, "linear_attention_nnz": 691200, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 873472, "linear_attention_total": 2359296, "linear_attention_nnz": 530432, "linear_dense_total": 4718592, "linear_dense_nnz": 343040}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 800768, "linear_attention_total": 2359296, "linear_attention_nnz": 378880, "linear_dense_total": 4718592, "linear_dense_nnz": 421888}}, "total_sparsity": 46.46587895775224, "linear_sparsity": 59.540171682098766}, "speed": {"eval_elapsed_time": 25.710868231020868}, "opt_eval_metrics": {"exact_match": 79.64995269631031, "f1": 87.30139925832849}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 56885634, "linear_total": 84934656, "linear_nnz": 32956416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2584576, "linear_attention_total": 2359296, "linear_attention_nnz": 949248, "linear_dense_total": 4718592, "linear_dense_nnz": 1635328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2798592, "linear_attention_total": 2359296, "linear_attention_nnz": 750592, "linear_dense_total": 4718592, "linear_dense_nnz": 2048000}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4019200, "linear_attention_total": 2359296, "linear_attention_nnz": 1123328, "linear_dense_total": 4718592, "linear_dense_nnz": 2895872}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4274176, "linear_attention_total": 2359296, "linear_attention_nnz": 1306624, "linear_dense_total": 4718592, "linear_dense_nnz": 2967552}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4581376, "linear_attention_total": 2359296, "linear_attention_nnz": 1475584, "linear_dense_total": 4718592, "linear_dense_nnz": 3105792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4219904, "linear_attention_total": 2359296, "linear_attention_nnz": 1285120, "linear_dense_total": 4718592, "linear_dense_nnz": 2934784}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3736576, "linear_attention_total": 2359296, "linear_attention_nnz": 1235968, "linear_dense_total": 4718592, "linear_dense_nnz": 2500608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2587648, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1604608}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1627136, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 661504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 880640, "linear_attention_total": 2359296, "linear_attention_nnz": 650240, "linear_dense_total": 4718592, "linear_dense_nnz": 230400}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 862208, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 352256}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784384, "linear_attention_total": 2359296, "linear_attention_nnz": 363520, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}}, "total_sparsity": 47.76015277944021, "linear_sparsity": 61.19791666666667}, "speed": {"eval_elapsed_time": 25.134117686888203}, "opt_eval_metrics": {"exact_match": 79.90539262062441, "f1": 87.36378709007766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 55520034, "linear_total": 84934656, "linear_nnz": 31592448, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2382848, "linear_attention_total": 2359296, "linear_attention_nnz": 889856, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2568192, "linear_attention_total": 2359296, "linear_attention_nnz": 717824, "linear_dense_total": 4718592, "linear_dense_nnz": 1850368}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3915776, "linear_attention_total": 2359296, "linear_attention_nnz": 1113088, "linear_dense_total": 4718592, "linear_dense_nnz": 2802688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4258816, "linear_attention_total": 2359296, "linear_attention_nnz": 1297408, "linear_dense_total": 4718592, "linear_dense_nnz": 2961408}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4300800, "linear_attention_total": 2359296, "linear_attention_nnz": 1402880, "linear_dense_total": 4718592, "linear_dense_nnz": 2897920}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4030464, "linear_attention_total": 2359296, "linear_attention_nnz": 1157120, "linear_dense_total": 4718592, "linear_dense_nnz": 2873344}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3661824, "linear_attention_total": 2359296, "linear_attention_nnz": 1187840, "linear_dense_total": 4718592, "linear_dense_nnz": 2473984}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2507776, "linear_attention_total": 2359296, "linear_attention_nnz": 979968, "linear_dense_total": 4718592, "linear_dense_nnz": 1527808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1562624, "linear_attention_total": 2359296, "linear_attention_nnz": 952320, "linear_dense_total": 4718592, "linear_dense_nnz": 610304}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 865280, "linear_attention_total": 2359296, "linear_attention_nnz": 642048, "linear_dense_total": 4718592, "linear_dense_nnz": 223232}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818176, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 328704}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 719872, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 388096}}, "total_sparsity": 49.0142257386059, "linear_sparsity": 62.80381944444444}, "speed": {"eval_elapsed_time": 24.50548317306675}, "opt_eval_metrics": {"exact_match": 79.82024597918638, "f1": 87.30735739624531}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 55329122, "linear_total": 84934656, "linear_nnz": 31404032, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2845696, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 2385920}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3173376, "linear_attention_total": 2359296, "linear_attention_nnz": 374784, "linear_dense_total": 4718592, "linear_dense_nnz": 2798592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3866624, "linear_attention_total": 2359296, "linear_attention_nnz": 411648, "linear_dense_total": 4718592, "linear_dense_nnz": 3454976}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4224000, "linear_attention_total": 2359296, "linear_attention_nnz": 727040, "linear_dense_total": 4718592, "linear_dense_nnz": 3496960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3954688, "linear_attention_total": 2359296, "linear_attention_nnz": 541696, "linear_dense_total": 4718592, "linear_dense_nnz": 3412992}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3993600, "linear_attention_total": 2359296, "linear_attention_nnz": 545792, "linear_dense_total": 4718592, "linear_dense_nnz": 3447808}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3427328, "linear_attention_total": 2359296, "linear_attention_nnz": 493568, "linear_dense_total": 4718592, "linear_dense_nnz": 2933760}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2641920, "linear_attention_total": 2359296, "linear_attention_nnz": 641024, "linear_dense_total": 4718592, "linear_dense_nnz": 2000896}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1293312, "linear_attention_total": 2359296, "linear_attention_nnz": 288768, "linear_dense_total": 4718592, "linear_dense_nnz": 1004544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 678912, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 339968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 671744, "linear_attention_total": 2359296, "linear_attention_nnz": 254976, "linear_dense_total": 4718592, "linear_dense_nnz": 416768}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 632832, "linear_attention_total": 2359296, "linear_attention_nnz": 165888, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}}, "total_sparsity": 49.1895461668281, "linear_sparsity": 63.025655864197525}, "speed": {"eval_elapsed_time": 21.893441491993144}, "opt_eval_metrics": {"exact_match": 77.68211920529801, "f1": 86.11161494070976}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 54788706, "linear_total": 84934656, "linear_nnz": 30864384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2661376, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 2226176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3087360, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 2727936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3895296, "linear_attention_total": 2359296, "linear_attention_nnz": 421888, "linear_dense_total": 4718592, "linear_dense_nnz": 3473408}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4162560, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 3451904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3985408, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 3437568}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3881984, "linear_attention_total": 2359296, "linear_attention_nnz": 556032, "linear_dense_total": 4718592, "linear_dense_nnz": 3325952}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3340288, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 2828288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2614272, "linear_attention_total": 2359296, "linear_attention_nnz": 622592, "linear_dense_total": 4718592, "linear_dense_nnz": 1991680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1256448, "linear_attention_total": 2359296, "linear_attention_nnz": 276480, "linear_dense_total": 4718592, "linear_dense_nnz": 979968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668672, "linear_attention_total": 2359296, "linear_attention_nnz": 337920, "linear_dense_total": 4718592, "linear_dense_nnz": 330752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 664576, "linear_attention_total": 2359296, "linear_attention_nnz": 252928, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 646144, "linear_attention_total": 2359296, "linear_attention_nnz": 158720, "linear_dense_total": 4718592, "linear_dense_nnz": 487424}}, "total_sparsity": 49.68582699012958, "linear_sparsity": 63.66102430555556}, "speed": {"eval_elapsed_time": 21.611296633956954}, "opt_eval_metrics": {"exact_match": 77.96594134342479, "f1": 86.01491496793933}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53430466, "linear_total": 84934656, "linear_nnz": 29507584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2533376, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 2119680}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2840576, "linear_attention_total": 2359296, "linear_attention_nnz": 364544, "linear_dense_total": 4718592, "linear_dense_nnz": 2476032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3843072, "linear_attention_total": 2359296, "linear_attention_nnz": 397312, "linear_dense_total": 4718592, "linear_dense_nnz": 3445760}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4069376, "linear_attention_total": 2359296, "linear_attention_nnz": 666624, "linear_dense_total": 4718592, "linear_dense_nnz": 3402752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3831808, "linear_attention_total": 2359296, "linear_attention_nnz": 492544, "linear_dense_total": 4718592, "linear_dense_nnz": 3339264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3714048, "linear_attention_total": 2359296, "linear_attention_nnz": 519168, "linear_dense_total": 4718592, "linear_dense_nnz": 3194880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3200000, "linear_attention_total": 2359296, "linear_attention_nnz": 448512, "linear_dense_total": 4718592, "linear_dense_nnz": 2751488}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415616, "linear_attention_total": 2359296, "linear_attention_nnz": 576512, "linear_dense_total": 4718592, "linear_dense_nnz": 1839104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211392, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619520, "linear_attention_total": 2359296, "linear_attention_nnz": 317440, "linear_dense_total": 4718592, "linear_dense_nnz": 302080}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 623616, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 386048}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 50.93314103235074, "linear_sparsity": 65.25848765432099}, "speed": {"eval_elapsed_time": 21.071897589135915}, "opt_eval_metrics": {"exact_match": 77.70104068117313, "f1": 85.88451743537976}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 40733175, "linear_total": 84934656, "linear_nnz": 16822784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1394688, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1640960, "linear_attention_total": 2359296, "linear_attention_nnz": 539648, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1878528, "linear_attention_total": 2359296, "linear_attention_nnz": 657408, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 864256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1902592, "linear_attention_total": 2359296, "linear_attention_nnz": 686080, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601536, "linear_attention_total": 2359296, "linear_attention_nnz": 649216, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1507328, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908800, "linear_attention_total": 2359296, "linear_attention_nnz": 474112, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 354304, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591872, "linear_attention_total": 2359296, "linear_attention_nnz": 226304, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 62.593458327135366, "linear_sparsity": 80.19326292438271}, "speed": {"eval_elapsed_time": 16.06849605194293}, "opt_eval_metrics": {"exact_match": 78.68495742667928, "f1": 86.66781681977909}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40239113, "linear_total": 84934656, "linear_nnz": 16329216, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1344512, "linear_attention_total": 2359296, "linear_attention_nnz": 518144, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 516096, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842176, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2097664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1184256}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2056192, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 664576, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1565184, "linear_attention_total": 2359296, "linear_attention_nnz": 629760, "linear_dense_total": 4718592, "linear_dense_nnz": 935424}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1486336, "linear_attention_total": 2359296, "linear_attention_nnz": 787456, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 844288, "linear_attention_total": 2359296, "linear_attention_nnz": 415744, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 423936, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 472064, "linear_attention_total": 2359296, "linear_attention_nnz": 324608, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555520, "linear_attention_total": 2359296, "linear_attention_nnz": 209920, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}}, "total_sparsity": 63.04717083032174, "linear_sparsity": 80.7743778935185}, "speed": {"eval_elapsed_time": 15.895570316817611}, "opt_eval_metrics": {"exact_match": 78.80794701986756, "f1": 86.74156854566804}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.9999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-5000": {"stats": {"total": 108893186, "nnz": 108881410, "linear_total": 84934656, "linear_nnz": 84922880, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7072768, "linear_attention_total": 2359296, "linear_attention_nnz": 2354176, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7076352, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4717056}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7076864, "linear_attention_total": 2359296, "linear_attention_nnz": 2358272, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7076864, "linear_attention_total": 2359296, "linear_attention_nnz": 2358272, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7074816, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4715520}}, "total_sparsity": 0.010814267111258768, "linear_sparsity": 0.013864776234573384}, "speed": {"eval_elapsed_time": 40.132621727185324}, "opt_eval_metrics": {"exact_match": 78.7038789025544, "f1": 86.6699349353281}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 43189250, "linear_total": 84934656, "linear_nnz": 19230720, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1584896, "linear_attention_total": 2359296, "linear_attention_nnz": 494336, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1917184, "linear_attention_total": 2359296, "linear_attention_nnz": 631552, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2092032, "linear_attention_total": 2359296, "linear_attention_nnz": 648192, "linear_dense_total": 4718592, "linear_dense_nnz": 1443840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2466816, "linear_attention_total": 2359296, "linear_attention_nnz": 1047552, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2403328, "linear_attention_total": 2359296, "linear_attention_nnz": 942592, "linear_dense_total": 4718592, "linear_dense_nnz": 1460736}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2211072, "linear_attention_total": 2359296, "linear_attention_nnz": 837888, "linear_dense_total": 4718592, "linear_dense_nnz": 1373184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1936640, "linear_attention_total": 2359296, "linear_attention_nnz": 841472, "linear_dense_total": 4718592, "linear_dense_nnz": 1095168}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661440, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 827904}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1084160, "linear_attention_total": 2359296, "linear_attention_nnz": 621824, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621056, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 188928}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 568064, "linear_attention_total": 2359296, "linear_attention_nnz": 411392, "linear_dense_total": 4718592, "linear_dense_nnz": 156672}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 684032, "linear_attention_total": 2359296, "linear_attention_nnz": 223232, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 60.33796825450584, "linear_sparsity": 77.3582175925926}, "speed": {"eval_elapsed_time": 18.20940860803239}, "opt_eval_metrics": {"exact_match": 78.73226111636707, "f1": 86.74884583609185}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42070530, "linear_total": 84934656, "linear_nnz": 18112000, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451008, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 970752}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835264, "linear_attention_total": 2359296, "linear_attention_nnz": 620288, "linear_dense_total": 4718592, "linear_dense_nnz": 1214976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2000384, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1374720}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2271232, "linear_attention_total": 2359296, "linear_attention_nnz": 933376, "linear_dense_total": 4718592, "linear_dense_nnz": 1337856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2267904, "linear_attention_total": 2359296, "linear_attention_nnz": 862464, "linear_dense_total": 4718592, "linear_dense_nnz": 1405440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081536, "linear_attention_total": 2359296, "linear_attention_nnz": 783616, "linear_dense_total": 4718592, "linear_dense_nnz": 1297920}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1807104, "linear_attention_total": 2359296, "linear_attention_nnz": 773376, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602048, "linear_attention_total": 2359296, "linear_attention_nnz": 811008, "linear_dense_total": 4718592, "linear_dense_nnz": 791040}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1009920, "linear_attention_total": 2359296, "linear_attention_nnz": 572160, "linear_dense_total": 4718592, "linear_dense_nnz": 437760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 543232, "linear_attention_total": 2359296, "linear_attention_nnz": 392704, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649472, "linear_attention_total": 2359296, "linear_attention_nnz": 214784, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}}, "total_sparsity": 61.365323630075444, "linear_sparsity": 78.67537133487654}, "speed": {"eval_elapsed_time": 18.07671318203211}, "opt_eval_metrics": {"exact_match": 78.97824030274361, "f1": 86.77789246016766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41670402, "linear_total": 84934656, "linear_nnz": 17711872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1409024, "linear_attention_total": 2359296, "linear_attention_nnz": 468992, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1792000, "linear_attention_total": 2359296, "linear_attention_nnz": 606208, "linear_dense_total": 4718592, "linear_dense_nnz": 1185792}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1974272, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1348608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2231552, "linear_attention_total": 2359296, "linear_attention_nnz": 910592, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2209536, "linear_attention_total": 2359296, "linear_attention_nnz": 828672, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2046464, "linear_attention_total": 2359296, "linear_attention_nnz": 765440, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1764096, "linear_attention_total": 2359296, "linear_attention_nnz": 761088, "linear_dense_total": 4718592, "linear_dense_nnz": 1003008}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1573120, "linear_attention_total": 2359296, "linear_attention_nnz": 792832, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 986880, "linear_attention_total": 2359296, "linear_attention_nnz": 553728, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 572672, "linear_attention_total": 2359296, "linear_attention_nnz": 389888, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 525568, "linear_attention_total": 2359296, "linear_attention_nnz": 378112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}}, "total_sparsity": 61.73277361909495, "linear_sparsity": 79.14647231867285}, "speed": {"eval_elapsed_time": 17.641912897117436}, "opt_eval_metrics": {"exact_match": 78.74172185430463, "f1": 86.69521763053608}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41478658, "linear_total": 84934656, "linear_nnz": 17520128, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404160, "linear_attention_total": 2359296, "linear_attention_nnz": 465664, "linear_dense_total": 4718592, "linear_dense_nnz": 938496}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1766912, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1182720}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1961216, "linear_attention_total": 2359296, "linear_attention_nnz": 615680, "linear_dense_total": 4718592, "linear_dense_nnz": 1345536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 895488, "linear_dense_total": 4718592, "linear_dense_nnz": 1314816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2189824, "linear_attention_total": 2359296, "linear_attention_nnz": 812032, "linear_dense_total": 4718592, "linear_dense_nnz": 1377792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2038016, "linear_attention_total": 2359296, "linear_attention_nnz": 755456, "linear_dense_total": 4718592, "linear_dense_nnz": 1282560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1738240, "linear_attention_total": 2359296, "linear_attention_nnz": 739840, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571584, "linear_attention_total": 2359296, "linear_attention_nnz": 797440, "linear_dense_total": 4718592, "linear_dense_nnz": 774144}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 943872, "linear_attention_total": 2359296, "linear_attention_nnz": 513792, "linear_dense_total": 4718592, "linear_dense_nnz": 430080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563968, "linear_attention_total": 2359296, "linear_attention_nnz": 381184, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516352, "linear_attention_total": 2359296, "linear_attention_nnz": 370432, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 615680, "linear_attention_total": 2359296, "linear_attention_nnz": 200960, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}}, "total_sparsity": 61.90885809879785, "linear_sparsity": 79.37222704475309}, "speed": {"eval_elapsed_time": 17.640617809956893}, "opt_eval_metrics": {"exact_match": 78.84578997161779, "f1": 86.78133258210022}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40218943, "linear_total": 84934656, "linear_nnz": 16260413, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725694, "linear_attention_total": 2359296, "linear_attention_nnz": 36794, "linear_dense_total": 4718592, "linear_dense_nnz": 1688900}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959620, "linear_attention_total": 2359296, "linear_attention_nnz": 233028, "linear_dense_total": 4718592, "linear_dense_nnz": 1726592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969125, "linear_attention_total": 2359296, "linear_attention_nnz": 194318, "linear_dense_total": 4718592, "linear_dense_nnz": 1774807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2012358, "linear_attention_total": 2359296, "linear_attention_nnz": 270153, "linear_dense_total": 4718592, "linear_dense_nnz": 1742205}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860862, "linear_attention_total": 2359296, "linear_attention_nnz": 207935, "linear_dense_total": 4718592, "linear_dense_nnz": 1652927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815188, "linear_attention_total": 2359296, "linear_attention_nnz": 215427, "linear_dense_total": 4718592, "linear_dense_nnz": 1599761}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518978, "linear_attention_total": 2359296, "linear_attention_nnz": 114563, "linear_dense_total": 4718592, "linear_dense_nnz": 1404415}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307646, "linear_attention_total": 2359296, "linear_attention_nnz": 165011, "linear_dense_total": 4718592, "linear_dense_nnz": 1142635}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 946142, "linear_attention_total": 2359296, "linear_attention_nnz": 86589, "linear_dense_total": 4718592, "linear_dense_nnz": 859553}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531809, "linear_attention_total": 2359296, "linear_attention_nnz": 110020, "linear_dense_total": 4718592, "linear_dense_nnz": 421789}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 419075, "linear_attention_total": 2359296, "linear_attention_nnz": 89475, "linear_dense_total": 4718592, "linear_dense_nnz": 329600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 193916, "linear_attention_total": 2359296, "linear_attention_nnz": 45791, "linear_dense_total": 4718592, "linear_dense_nnz": 148125}}, "total_sparsity": 63.065693568741764, "linear_sparsity": 80.85538487375518}, "speed": {"eval_elapsed_time": 25.115268317982554}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.30981160352648}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl5_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-10000": {"stats": {"total": 108893186, "nnz": 107521026, "linear_total": 84934656, "linear_nnz": 83562496, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6776832, "linear_attention_total": 2359296, "linear_attention_nnz": 2067456, "linear_dense_total": 4718592, "linear_dense_nnz": 4709376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6964224, "linear_attention_total": 2359296, "linear_attention_nnz": 2279424, "linear_dense_total": 4718592, "linear_dense_nnz": 4684800}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7028736, "linear_attention_total": 2359296, "linear_attention_nnz": 2329600, "linear_dense_total": 4718592, "linear_dense_nnz": 4699136}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7020544, "linear_attention_total": 2359296, "linear_attention_nnz": 2313216, "linear_dense_total": 4718592, "linear_dense_nnz": 4707328}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7008256, "linear_attention_total": 2359296, "linear_attention_nnz": 2319360, "linear_dense_total": 4718592, "linear_dense_nnz": 4688896}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7026688, "linear_attention_total": 2359296, "linear_attention_nnz": 2332672, "linear_dense_total": 4718592, "linear_dense_nnz": 4694016}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7005184, "linear_attention_total": 2359296, "linear_attention_nnz": 2317312, "linear_dense_total": 4718592, "linear_dense_nnz": 4687872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7006208, "linear_attention_total": 2359296, "linear_attention_nnz": 2332672, "linear_dense_total": 4718592, "linear_dense_nnz": 4673536}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6934528, "linear_attention_total": 2359296, "linear_attention_nnz": 2287616, "linear_dense_total": 4718592, "linear_dense_nnz": 4646912}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6907904, "linear_attention_total": 2359296, "linear_attention_nnz": 2265088, "linear_dense_total": 4718592, "linear_dense_nnz": 4642816}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6916096, "linear_attention_total": 2359296, "linear_attention_nnz": 2250752, "linear_dense_total": 4718592, "linear_dense_nnz": 4665344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6967296, "linear_attention_total": 2359296, "linear_attention_nnz": 2277376, "linear_dense_total": 4718592, "linear_dense_nnz": 4689920}}, "total_sparsity": 1.260097211224953, "linear_sparsity": 1.6155478395061706}, "speed": {"eval_elapsed_time": 40.19490528292954}, "opt_eval_metrics": {"exact_match": 80.17029328287606, "f1": 87.61987487435422}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39702836, "linear_total": 84934656, "linear_nnz": 15791104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125888, "linear_attention_total": 2359296, "linear_attention_nnz": 838656, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1188352, "linear_attention_total": 2359296, "linear_attention_nnz": 692224, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1694720, "linear_attention_total": 2359296, "linear_attention_nnz": 1089536, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1962496, "linear_attention_total": 2359296, "linear_attention_nnz": 1291264, "linear_dense_total": 4718592, "linear_dense_nnz": 671232}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2112512, "linear_attention_total": 2359296, "linear_attention_nnz": 1384448, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1783296, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1632768, "linear_attention_total": 2359296, "linear_attention_nnz": 1127424, "linear_dense_total": 4718592, "linear_dense_nnz": 505344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1333760, "linear_attention_total": 2359296, "linear_attention_nnz": 942080, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1204736, "linear_attention_total": 2359296, "linear_attention_nnz": 982016, "linear_dense_total": 4718592, "linear_dense_nnz": 222720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 729600, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 573952, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 449024, "linear_attention_total": 2359296, "linear_attention_nnz": 293888, "linear_dense_total": 4718592, "linear_dense_nnz": 155136}}, "total_sparsity": 63.53965068117302, "linear_sparsity": 81.40793788580247}, "speed": {"eval_elapsed_time": 17.754389239940792}, "opt_eval_metrics": {"exact_match": 79.20529801324503, "f1": 87.11181141207972}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41908226, "linear_total": 84934656, "linear_nnz": 17949696, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081280, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 606720}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529856, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 841728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2437632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2115072, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1927680, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1448448, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 668160}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665088, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 173568}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 595968, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.51437244200017, "linear_sparsity": 78.86646412037037}, "speed": {"eval_elapsed_time": 12.991677745943889}, "opt_eval_metrics": {"exact_match": 78.05108798486282, "f1": 85.81174728555466}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41954306, "linear_total": 84934656, "linear_nnz": 17995776, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2079744, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1626624, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 840192}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434560, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1058304}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2116608, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1035264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1829376, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886208, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1099776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497600, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1210368, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 764928, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 546816, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.4720557446083, "linear_sparsity": 78.81221064814815}, "speed": {"eval_elapsed_time": 12.87139375694096}, "opt_eval_metrics": {"exact_match": 77.62535477767265, "f1": 85.49958980627748}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 36346370, "linear_total": 84934656, "linear_nnz": 12387840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1721856, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 950784, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1857024, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437696, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1113600, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1015296, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 425472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 731136, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 999936, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 213504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 296448, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 99840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 614400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 122880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}}, "total_sparsity": 66.62199781720042, "linear_sparsity": 85.4148582175926}, "speed": {"eval_elapsed_time": 11.044030340854079}, "opt_eval_metrics": {"exact_match": 76.75496688741723, "f1": 84.83470649534952}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 35750402, "linear_total": 84934656, "linear_nnz": 11791872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893952, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1247232, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 520704}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 978432, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 388608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 365568, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}}, "total_sparsity": 67.16929377013544, "linear_sparsity": 86.11653645833334}, "speed": {"eval_elapsed_time": 10.875461397925392}, "opt_eval_metrics": {"exact_match": 76.3197729422895, "f1": 84.62201750681498}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 35662850, "linear_total": 84934656, "linear_nnz": 11704320, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 897024, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 258048}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804800, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1184256, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1064448, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 474624}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 364032, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}}, "total_sparsity": 67.24969549518002, "linear_sparsity": 86.21961805555556}, "speed": {"eval_elapsed_time": 10.863983491901308}, "opt_eval_metrics": {"exact_match": 76.5279091769158, "f1": 84.6776690586996}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35638274, "linear_total": 84934656, "linear_nnz": 11679744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1586688, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 210432}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887808, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1175040, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 486912}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1062912, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 290304, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 93696}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360960, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 164352}}, "total_sparsity": 67.27226440045568, "linear_sparsity": 86.24855324074075}, "speed": {"eval_elapsed_time": 10.856388033833355}, "opt_eval_metrics": {"exact_match": 76.31031220435194, "f1": 84.63605545666391}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33844482, "linear_total": 84934656, "linear_nnz": 9885952, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701184, "linear_attention_total": 2359296, "linear_attention_nnz": 295680, "linear_dense_total": 4718592, "linear_dense_nnz": 405504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042688, "linear_attention_total": 2359296, "linear_attention_nnz": 380672, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1087744, "linear_attention_total": 2359296, "linear_attention_nnz": 328960, "linear_dense_total": 4718592, "linear_dense_nnz": 758784}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1340672, "linear_attention_total": 2359296, "linear_attention_nnz": 612608, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1142784, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 811008}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1165312, "linear_attention_total": 2359296, "linear_attention_nnz": 411136, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908032, "linear_attention_total": 2359296, "linear_attention_nnz": 319744, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 956672, "linear_attention_total": 2359296, "linear_attention_nnz": 457472, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 557056, "linear_attention_total": 2359296, "linear_attention_nnz": 246784, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360192, "linear_attention_total": 2359296, "linear_attention_nnz": 252672, "linear_dense_total": 4718592, "linear_dense_nnz": 107520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315904, "linear_attention_total": 2359296, "linear_attention_nnz": 202240, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 307712, "linear_attention_total": 2359296, "linear_attention_nnz": 129536, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}}, "total_sparsity": 68.91955939281638, "linear_sparsity": 88.36052035108025}, "speed": {"eval_elapsed_time": 14.372085305862129}, "opt_eval_metrics": {"exact_match": 76.33869441816462, "f1": 84.90005817955239}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 46753113, "linear_total": 84934656, "linear_nnz": 22841856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2904576, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1430016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2368512, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1582080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3084288, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1904640}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2600448, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1715712}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2244096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1654272}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096640, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1703424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1910784, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1476096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1082880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1534464, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 748032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523776, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 327168}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 990720, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1107456, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 812544}}, "total_sparsity": 57.06516200196401, "linear_sparsity": 73.10655381944444}, "speed": {"eval_elapsed_time": 13.856825530063361}, "opt_eval_metrics": {"exact_match": 78.31598864711448, "f1": 86.14732314693939}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 46580969, "linear_total": 84934656, "linear_nnz": 22669824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2883072, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1408512}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2342400, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1555968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3055104, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1875456}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2585088, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1700352}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1635840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2082816, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1689600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1901568, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1311744}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1468416, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1075200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1528320, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 741888}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520704, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 324096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1093632, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}}, "total_sparsity": 57.22324719197764, "linear_sparsity": 73.30910011574075}, "speed": {"eval_elapsed_time": 13.743963541928679}, "opt_eval_metrics": {"exact_match": 77.96594134342479, "f1": 85.85795020085484}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46293486, "linear_total": 84934656, "linear_nnz": 22382592, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850816, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1376256}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2323968, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1537536}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3022848, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1843200}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2557440, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1672704}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1620480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2068992, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1675776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790976, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1299456}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1462272, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1523712, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 737280}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 322560}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 970752, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 786432}}, "total_sparsity": 57.487251773494805, "linear_sparsity": 73.6472800925926}, "speed": {"eval_elapsed_time": 13.430293028010055}, "opt_eval_metrics": {"exact_match": 77.88079470198676, "f1": 85.81326419854291}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl300_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 26853628, "linear_total": 84934656, "linear_nnz": 2895098, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 357857, "linear_attention_total": 2359296, "linear_attention_nnz": 13195, "linear_dense_total": 4718592, "linear_dense_nnz": 344662}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 405482, "linear_attention_total": 2359296, "linear_attention_nnz": 53357, "linear_dense_total": 4718592, "linear_dense_nnz": 352125}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 395119, "linear_attention_total": 2359296, "linear_attention_nnz": 43981, "linear_dense_total": 4718592, "linear_dense_nnz": 351138}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 394117, "linear_attention_total": 2359296, "linear_attention_nnz": 71058, "linear_dense_total": 4718592, "linear_dense_nnz": 323059}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 335373, "linear_attention_total": 2359296, "linear_attention_nnz": 47705, "linear_dense_total": 4718592, "linear_dense_nnz": 287668}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 292526, "linear_attention_total": 2359296, "linear_attention_nnz": 40348, "linear_dense_total": 4718592, "linear_dense_nnz": 252178}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 238114, "linear_attention_total": 2359296, "linear_attention_nnz": 33002, "linear_dense_total": 4718592, "linear_dense_nnz": 205112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 188891, "linear_attention_total": 2359296, "linear_attention_nnz": 38753, "linear_dense_total": 4718592, "linear_dense_nnz": 150138}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 123365, "linear_attention_total": 2359296, "linear_attention_nnz": 22052, "linear_dense_total": 4718592, "linear_dense_nnz": 101313}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64415, "linear_attention_total": 2359296, "linear_attention_nnz": 28498, "linear_dense_total": 4718592, "linear_dense_nnz": 35917}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 53470, "linear_attention_total": 2359296, "linear_attention_nnz": 18747, "linear_dense_total": 4718592, "linear_dense_nnz": 34723}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 46369, "linear_attention_total": 2359296, "linear_attention_nnz": 15957, "linear_dense_total": 4718592, "linear_dense_nnz": 30412}}, "total_sparsity": 75.33947808267818, "linear_sparsity": 96.59138196780358}, "speed": {"eval_elapsed_time": 25.971711199032143}, "opt_eval_metrics": {"exact_match": 76.98202459791864, "f1": 85.40699359564026}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 40641026, "linear_total": 84934656, "linear_nnz": 16682496, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1930752, "linear_attention_total": 2359296, "linear_attention_nnz": 1390080, "linear_dense_total": 4718592, "linear_dense_nnz": 540672}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1347840, "linear_attention_total": 2359296, "linear_attention_nnz": 622848, "linear_dense_total": 4718592, "linear_dense_nnz": 724992}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2423808, "linear_attention_total": 2359296, "linear_attention_nnz": 1506816, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1864704, "linear_attention_total": 2359296, "linear_attention_nnz": 966144, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1956096, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 990720}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742592, "linear_attention_total": 2359296, "linear_attention_nnz": 734976, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1323264, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835584, "linear_attention_total": 2359296, "linear_attention_nnz": 282624, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307904, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 403968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 536064, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 506880, "linear_attention_total": 2359296, "linear_attention_nnz": 322560, "linear_dense_total": 4718592, "linear_dense_nnz": 184320}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 761088, "linear_attention_total": 2359296, "linear_attention_nnz": 412416, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}}, "total_sparsity": 62.67808162027695, "linear_sparsity": 80.3584346064815}, "speed": {"eval_elapsed_time": 13.440584641881287}, "opt_eval_metrics": {"exact_match": 76.13055818353831, "f1": 84.59415607632204}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 39460610, "linear_total": 84934656, "linear_nnz": 15502080, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1801728, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1127424, "linear_attention_total": 2359296, "linear_attention_nnz": 471552, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2341632, "linear_attention_total": 2359296, "linear_attention_nnz": 1507584, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804032, "linear_attention_total": 2359296, "linear_attention_nnz": 960768, "linear_dense_total": 4718592, "linear_dense_nnz": 843264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899264, "linear_attention_total": 2359296, "linear_attention_nnz": 968448, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529088, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 564480, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738048, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 506880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 893952, "linear_dense_total": 4718592, "linear_dense_nnz": 384000}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668928, "linear_attention_total": 2359296, "linear_attention_nnz": 535296, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488448, "linear_attention_total": 2359296, "linear_attention_nnz": 319488, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 560640, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}}, "total_sparsity": 63.76209435179903, "linear_sparsity": 81.7482277199074}, "speed": {"eval_elapsed_time": 13.018812068970874}, "opt_eval_metrics": {"exact_match": 76.20624408703878, "f1": 84.78885528858153}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 39496706, "linear_total": 84934656, "linear_nnz": 15538176, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798656, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1122816, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 649728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2325504, "linear_attention_total": 2359296, "linear_attention_nnz": 1500672, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790208, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886976, "linear_attention_total": 2359296, "linear_attention_nnz": 963840, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522944, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 924672}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 558336, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 737280, "linear_attention_total": 2359296, "linear_attention_nnz": 235008, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1286400, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 382464}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665856, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 417792, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 559104, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 301056}}, "total_sparsity": 63.72894627217538, "linear_sparsity": 81.70572916666666}, "speed": {"eval_elapsed_time": 12.96851964481175}, "opt_eval_metrics": {"exact_match": 75.67644276253547, "f1": 84.4740049617883}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39419906, "linear_total": 84934656, "linear_nnz": 15461376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1800192, "linear_attention_total": 2359296, "linear_attention_nnz": 1334784, "linear_dense_total": 4718592, "linear_dense_nnz": 465408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1118976, "linear_attention_total": 2359296, "linear_attention_nnz": 473856, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2320896, "linear_attention_total": 2359296, "linear_attention_nnz": 1497600, "linear_dense_total": 4718592, "linear_dense_nnz": 823296}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1788672, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 832512}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1891584, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1520640, "linear_attention_total": 2359296, "linear_attention_nnz": 600576, "linear_dense_total": 4718592, "linear_dense_nnz": 920064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 732672, "linear_attention_total": 2359296, "linear_attention_nnz": 230400, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277184, "linear_attention_total": 2359296, "linear_attention_nnz": 897792, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 660480, "linear_attention_total": 2359296, "linear_attention_nnz": 528384, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555264, "linear_attention_total": 2359296, "linear_attention_nnz": 257280, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.79947410116185, "linear_sparsity": 81.79615162037037}, "speed": {"eval_elapsed_time": 12.973318020114675}, "opt_eval_metrics": {"exact_match": 76.0170293282876, "f1": 84.48208063503463}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39382274, "linear_total": 84934656, "linear_nnz": 15423744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1793280, "linear_attention_total": 2359296, "linear_attention_nnz": 1323264, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1115136, "linear_attention_total": 2359296, "linear_attention_nnz": 470016, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2321664, "linear_attention_total": 2359296, "linear_attention_nnz": 1496832, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1789440, "linear_attention_total": 2359296, "linear_attention_nnz": 960000, "linear_dense_total": 4718592, "linear_dense_nnz": 829440}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1843968, "linear_attention_total": 2359296, "linear_attention_nnz": 917760, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526016, "linear_attention_total": 2359296, "linear_attention_nnz": 607488, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 567552, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730368, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1279488, "linear_attention_total": 2359296, "linear_attention_nnz": 900096, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667392, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 556032, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.83403273736522, "linear_sparsity": 81.84045862268519}, "speed": {"eval_elapsed_time": 12.801363392965868}, "opt_eval_metrics": {"exact_match": 75.93188268684958, "f1": 84.50981123274157}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 45409666, "linear_total": 84934656, "linear_nnz": 21492736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1715200, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 1234944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895424, "linear_attention_total": 2359296, "linear_attention_nnz": 400384, "linear_dense_total": 4718592, "linear_dense_nnz": 1495040}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3012608, "linear_attention_total": 2359296, "linear_attention_nnz": 594944, "linear_dense_total": 4718592, "linear_dense_nnz": 2417664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3094528, "linear_attention_total": 2359296, "linear_attention_nnz": 813056, "linear_dense_total": 4718592, "linear_dense_nnz": 2281472}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2762752, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 2163712}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2707456, "linear_attention_total": 2359296, "linear_attention_nnz": 562176, "linear_dense_total": 4718592, "linear_dense_nnz": 2145280}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2233344, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1741824, "linear_attention_total": 2359296, "linear_attention_nnz": 678912, "linear_dense_total": 4718592, "linear_dense_nnz": 1062912}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 709632, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 370688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 524288, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 164864}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 593920, "linear_attention_total": 2359296, "linear_attention_nnz": 267264, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 163840, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}}, "total_sparsity": 58.29889117212532, "linear_sparsity": 74.6949749228395}, "speed": {"eval_elapsed_time": 18.650014573941007}, "opt_eval_metrics": {"exact_match": 77.01986754966887, "f1": 85.2617013700351}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 44308674, "linear_total": 84934656, "linear_nnz": 20392960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1598464, "linear_attention_total": 2359296, "linear_attention_nnz": 458752, "linear_dense_total": 4718592, "linear_dense_nnz": 1139712}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825792, "linear_attention_total": 2359296, "linear_attention_nnz": 398336, "linear_dense_total": 4718592, "linear_dense_nnz": 1427456}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2854912, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 2257920}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2905088, "linear_attention_total": 2359296, "linear_attention_nnz": 781312, "linear_dense_total": 4718592, "linear_dense_nnz": 2123776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2643968, "linear_attention_total": 2359296, "linear_attention_nnz": 620544, "linear_dense_total": 4718592, "linear_dense_nnz": 2023424}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2543616, "linear_attention_total": 2359296, "linear_attention_nnz": 573440, "linear_dense_total": 4718592, "linear_dense_nnz": 1970176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2049024, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 1588224}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1708032, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 666624, "linear_attention_total": 2359296, "linear_attention_nnz": 307200, "linear_dense_total": 4718592, "linear_dense_nnz": 359424}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489472, "linear_attention_total": 2359296, "linear_attention_nnz": 327680, "linear_dense_total": 4718592, "linear_dense_nnz": 161792}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598016, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 509952, "linear_attention_total": 2359296, "linear_attention_nnz": 162816, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}}, "total_sparsity": 59.309966373837206, "linear_sparsity": 75.98982445987654}, "speed": {"eval_elapsed_time": 18.39338346105069}, "opt_eval_metrics": {"exact_match": 76.98202459791864, "f1": 85.22056943761015}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 43172098, "linear_total": 84934656, "linear_nnz": 19257344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1469440, "linear_attention_total": 2359296, "linear_attention_nnz": 443392, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 396288, "linear_dense_total": 4718592, "linear_dense_nnz": 1296384}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692096, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 2113536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2728960, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 1973248}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2531328, "linear_attention_total": 2359296, "linear_attention_nnz": 565248, "linear_dense_total": 4718592, "linear_dense_nnz": 1966080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434048, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 1887232}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1978368, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 1502208}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638400, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1000448}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 620544, "linear_attention_total": 2359296, "linear_attention_nnz": 310272, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 457728, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 308224}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 467968, "linear_attention_total": 2359296, "linear_attention_nnz": 152576, "linear_dense_total": 4718592, "linear_dense_nnz": 315392}}, "total_sparsity": 60.35371946964616, "linear_sparsity": 77.3268711419753}, "speed": {"eval_elapsed_time": 17.99394460907206}, "opt_eval_metrics": {"exact_match": 76.92526017029329, "f1": 85.21713644985097}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42975330, "linear_total": 84934656, "linear_nnz": 19060736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1463296, "linear_attention_total": 2359296, "linear_attention_nnz": 455680, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1699840, "linear_attention_total": 2359296, "linear_attention_nnz": 399360, "linear_dense_total": 4718592, "linear_dense_nnz": 1300480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2724864, "linear_attention_total": 2359296, "linear_attention_nnz": 544768, "linear_dense_total": 4718592, "linear_dense_nnz": 2180096}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2670592, "linear_attention_total": 2359296, "linear_attention_nnz": 731136, "linear_dense_total": 4718592, "linear_dense_nnz": 1939456}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2498560, "linear_attention_total": 2359296, "linear_attention_nnz": 557056, "linear_dense_total": 4718592, "linear_dense_nnz": 1941504}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2407424, "linear_attention_total": 2359296, "linear_attention_nnz": 527360, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1928192, "linear_attention_total": 2359296, "linear_attention_nnz": 472064, "linear_dense_total": 4718592, "linear_dense_nnz": 1456128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 607232, "linear_dense_total": 4718592, "linear_dense_nnz": 977920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 317440}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455680, "linear_attention_total": 2359296, "linear_attention_nnz": 308224, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 240640, "linear_dense_total": 4718592, "linear_dense_nnz": 305152}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 329728}}, "total_sparsity": 60.53441764482857, "linear_sparsity": 77.55835262345678}, "speed": {"eval_elapsed_time": 17.949384653009474}, "opt_eval_metrics": {"exact_match": 77.08609271523179, "f1": 85.20287591064626}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53955042, "linear_total": 84934656, "linear_nnz": 30029824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2392064, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 1844224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2719744, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 2172928}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3872768, "linear_attention_total": 2359296, "linear_attention_nnz": 675840, "linear_dense_total": 4718592, "linear_dense_nnz": 3196928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4077568, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 3111936}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4003840, "linear_attention_total": 2359296, "linear_attention_nnz": 896000, "linear_dense_total": 4718592, "linear_dense_nnz": 3107840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3832832, "linear_attention_total": 2359296, "linear_attention_nnz": 696320, "linear_dense_total": 4718592, "linear_dense_nnz": 3136512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3280896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 2525184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2510848, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1711104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257472, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 747520}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 420864, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 748544, "linear_attention_total": 2359296, "linear_attention_nnz": 356352, "linear_dense_total": 4718592, "linear_dense_nnz": 392192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 651264, "linear_attention_total": 2359296, "linear_attention_nnz": 217088, "linear_dense_total": 4718592, "linear_dense_nnz": 434176}}, "total_sparsity": 50.45140657377771, "linear_sparsity": 64.6436149691358}, "speed": {"eval_elapsed_time": 22.46348627889529}, "opt_eval_metrics": {"exact_match": 78.94985808893094, "f1": 86.768721062838}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47626001, "linear_total": 84934656, "linear_nnz": 23714304, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2895360, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2330112, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3240960, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2061312}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3095040, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1915392}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2291712, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1800192}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2221056, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1827840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1861632, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1300992, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 1104384}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637376, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 582144, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 385536}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1052160, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 560640}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1205760, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 910848}}, "total_sparsity": 56.2635617989908, "linear_sparsity": 72.07935474537037}, "speed": {"eval_elapsed_time": 13.948209983995184}, "opt_eval_metrics": {"exact_match": 77.78618732261117, "f1": 85.70556837897196}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 45695714, "linear_total": 84934656, "linear_nnz": 21777408, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1549312, "linear_attention_total": 2359296, "linear_attention_nnz": 679936, "linear_dense_total": 4718592, "linear_dense_nnz": 869376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1868800, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 1269760}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2739200, "linear_attention_total": 2359296, "linear_attention_nnz": 875520, "linear_dense_total": 4718592, "linear_dense_nnz": 1863680}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3088384, "linear_attention_total": 2359296, "linear_attention_nnz": 1137664, "linear_dense_total": 4718592, "linear_dense_nnz": 1950720}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2821120, "linear_attention_total": 2359296, "linear_attention_nnz": 1033216, "linear_dense_total": 4718592, "linear_dense_nnz": 1787904}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2709504, "linear_attention_total": 2359296, "linear_attention_nnz": 850944, "linear_dense_total": 4718592, "linear_dense_nnz": 1858560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225152, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1426432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 987136}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049600, "linear_attention_total": 2359296, "linear_attention_nnz": 782336, "linear_dense_total": 4718592, "linear_dense_nnz": 267264}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649216, "linear_attention_total": 2359296, "linear_attention_nnz": 504832, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 662528, "linear_attention_total": 2359296, "linear_attention_nnz": 379904, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 548864, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 290816}}, "total_sparsity": 58.036204395746125, "linear_sparsity": 74.35980902777779}, "speed": {"eval_elapsed_time": 20.075127677991986}, "opt_eval_metrics": {"exact_match": 77.92809839167455, "f1": 85.97854187426412}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 44413282, "linear_total": 84934656, "linear_nnz": 20496384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1436672, "linear_attention_total": 2359296, "linear_attention_nnz": 647168, "linear_dense_total": 4718592, "linear_dense_nnz": 789504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798144, "linear_attention_total": 2359296, "linear_attention_nnz": 591872, "linear_dense_total": 4718592, "linear_dense_nnz": 1206272}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2583552, "linear_attention_total": 2359296, "linear_attention_nnz": 843776, "linear_dense_total": 4718592, "linear_dense_nnz": 1739776}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2975744, "linear_attention_total": 2359296, "linear_attention_nnz": 1118208, "linear_dense_total": 4718592, "linear_dense_nnz": 1857536}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1760256}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2509824, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 1718272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2085888, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 1330176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1731584, "linear_attention_total": 2359296, "linear_attention_nnz": 827392, "linear_dense_total": 4718592, "linear_dense_nnz": 904192}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 726016, "linear_dense_total": 4718592, "linear_dense_nnz": 257024}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 464896, "linear_dense_total": 4718592, "linear_dense_nnz": 118784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 622592, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 263168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 512000, "linear_attention_total": 2359296, "linear_attention_nnz": 240640, "linear_dense_total": 4718592, "linear_dense_nnz": 271360}}, "total_sparsity": 59.21390159343854, "linear_sparsity": 75.86805555555556}, "speed": {"eval_elapsed_time": 19.613351088017225}, "opt_eval_metrics": {"exact_match": 77.8713339640492, "f1": 85.84893170709621}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 67469538, "linear_total": 84934656, "linear_nnz": 43535360, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4336640, "linear_attention_total": 2359296, "linear_attention_nnz": 571392, "linear_dense_total": 4718592, "linear_dense_nnz": 3765248}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4451328, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 3852288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4783104, "linear_attention_total": 2359296, "linear_attention_nnz": 695296, "linear_dense_total": 4718592, "linear_dense_nnz": 4087808}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5047296, "linear_attention_total": 2359296, "linear_attention_nnz": 996352, "linear_dense_total": 4718592, "linear_dense_nnz": 4050944}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5032960, "linear_attention_total": 2359296, "linear_attention_nnz": 923648, "linear_dense_total": 4718592, "linear_dense_nnz": 4109312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4907008, "linear_attention_total": 2359296, "linear_attention_nnz": 865280, "linear_dense_total": 4718592, "linear_dense_nnz": 4041728}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4636672, "linear_attention_total": 2359296, "linear_attention_nnz": 778240, "linear_dense_total": 4718592, "linear_dense_nnz": 3858432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4243456, "linear_attention_total": 2359296, "linear_attention_nnz": 883712, "linear_dense_total": 4718592, "linear_dense_nnz": 3359744}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2818048, "linear_attention_total": 2359296, "linear_attention_nnz": 513024, "linear_dense_total": 4718592, "linear_dense_nnz": 2305024}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1289216, "linear_attention_total": 2359296, "linear_attention_nnz": 462848, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1047552, "linear_attention_total": 2359296, "linear_attention_nnz": 374784, "linear_dense_total": 4718592, "linear_dense_nnz": 672768}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 942080, "linear_attention_total": 2359296, "linear_attention_nnz": 235520, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}}, "total_sparsity": 38.04062450702838, "linear_sparsity": 48.742525077160494}, "speed": {"eval_elapsed_time": 26.131227070000023}, "opt_eval_metrics": {"exact_match": 79.67833491012298, "f1": 87.14623278516426}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 64400930, "linear_total": 84934656, "linear_nnz": 40469504, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3907584, "linear_attention_total": 2359296, "linear_attention_nnz": 527360, "linear_dense_total": 4718592, "linear_dense_nnz": 3380224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4186112, "linear_attention_total": 2359296, "linear_attention_nnz": 524288, "linear_dense_total": 4718592, "linear_dense_nnz": 3661824}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4629504, "linear_attention_total": 2359296, "linear_attention_nnz": 598016, "linear_dense_total": 4718592, "linear_dense_nnz": 4031488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5038080, "linear_attention_total": 2359296, "linear_attention_nnz": 930816, "linear_dense_total": 4718592, "linear_dense_nnz": 4107264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4822016, "linear_attention_total": 2359296, "linear_attention_nnz": 824320, "linear_dense_total": 4718592, "linear_dense_nnz": 3997696}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4773888, "linear_attention_total": 2359296, "linear_attention_nnz": 746496, "linear_dense_total": 4718592, "linear_dense_nnz": 4027392}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4408320, "linear_attention_total": 2359296, "linear_attention_nnz": 670720, "linear_dense_total": 4718592, "linear_dense_nnz": 3737600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3789824, "linear_attention_total": 2359296, "linear_attention_nnz": 794624, "linear_dense_total": 4718592, "linear_dense_nnz": 2995200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2176000, "linear_attention_total": 2359296, "linear_attention_nnz": 419840, "linear_dense_total": 4718592, "linear_dense_nnz": 1756160}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1011712, "linear_attention_total": 2359296, "linear_attention_nnz": 411648, "linear_dense_total": 4718592, "linear_dense_nnz": 600064}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 903168, "linear_attention_total": 2359296, "linear_attention_nnz": 307200, "linear_dense_total": 4718592, "linear_dense_nnz": 595968}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 823296, "linear_attention_total": 2359296, "linear_attention_nnz": 207872, "linear_dense_total": 4718592, "linear_dense_nnz": 615424}}, "total_sparsity": 40.85862268737366, "linear_sparsity": 52.35218942901234}, "speed": {"eval_elapsed_time": 25.23966666101478}, "opt_eval_metrics": {"exact_match": 79.29990539262063, "f1": 87.09851869948527}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 64383586, "linear_total": 84934656, "linear_nnz": 40452096, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3881984, "linear_attention_total": 2359296, "linear_attention_nnz": 501760, "linear_dense_total": 4718592, "linear_dense_nnz": 3380224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4185088, "linear_attention_total": 2359296, "linear_attention_nnz": 528384, "linear_dense_total": 4718592, "linear_dense_nnz": 3656704}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4703232, "linear_attention_total": 2359296, "linear_attention_nnz": 581632, "linear_dense_total": 4718592, "linear_dense_nnz": 4121600}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5060608, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 4144128}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4893696, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 4060160}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4817920, "linear_attention_total": 2359296, "linear_attention_nnz": 741376, "linear_dense_total": 4718592, "linear_dense_nnz": 4076544}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4459520, "linear_attention_total": 2359296, "linear_attention_nnz": 644096, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3720192, "linear_attention_total": 2359296, "linear_attention_nnz": 757760, "linear_dense_total": 4718592, "linear_dense_nnz": 2962432}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2070528, "linear_attention_total": 2359296, "linear_attention_nnz": 380928, "linear_dense_total": 4718592, "linear_dense_nnz": 1689600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 966656, "linear_attention_total": 2359296, "linear_attention_nnz": 395264, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 874496, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 561152}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818176, "linear_attention_total": 2359296, "linear_attention_nnz": 200704, "linear_dense_total": 4718592, "linear_dense_nnz": 617472}}, "total_sparsity": 40.874550222086434, "linear_sparsity": 52.37268518518518}, "speed": {"eval_elapsed_time": 25.169638738036156}, "opt_eval_metrics": {"exact_match": 79.22421948912014, "f1": 87.0664817371684}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41117954, "linear_total": 84934656, "linear_nnz": 17159424, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879296, "linear_attention_total": 2359296, "linear_attention_nnz": 1459968, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1487616, "linear_attention_total": 2359296, "linear_attention_nnz": 930048, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2451456, "linear_attention_total": 2359296, "linear_attention_nnz": 1651200, "linear_dense_total": 4718592, "linear_dense_nnz": 800256}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959168, "linear_attention_total": 2359296, "linear_attention_nnz": 1181952, "linear_dense_total": 4718592, "linear_dense_nnz": 777216}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1876992, "linear_attention_total": 2359296, "linear_attention_nnz": 996864, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 720384, "linear_dense_total": 4718592, "linear_dense_nnz": 886272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1242624, "linear_attention_total": 2359296, "linear_attention_nnz": 595968, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1026048, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1362432, "linear_attention_total": 2359296, "linear_attention_nnz": 1029120, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784128, "linear_attention_total": 2359296, "linear_attention_nnz": 673536, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563712, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.240103802270966, "linear_sparsity": 79.7969111689815}, "speed": {"eval_elapsed_time": 14.573690482182428}, "opt_eval_metrics": {"exact_match": 78.00378429517502, "f1": 85.86131877012127}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41037314, "linear_total": 84934656, "linear_nnz": 17078784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1881600, "linear_attention_total": 2359296, "linear_attention_nnz": 1460736, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488384, "linear_attention_total": 2359296, "linear_attention_nnz": 930816, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2430720, "linear_attention_total": 2359296, "linear_attention_nnz": 1636608, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1953024, "linear_attention_total": 2359296, "linear_attention_nnz": 1172736, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1822464, "linear_attention_total": 2359296, "linear_attention_nnz": 946944, "linear_dense_total": 4718592, "linear_dense_nnz": 875520}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602816, "linear_attention_total": 2359296, "linear_attention_nnz": 719616, "linear_dense_total": 4718592, "linear_dense_nnz": 883200}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1248768, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023744, "linear_attention_total": 2359296, "linear_attention_nnz": 530688, "linear_dense_total": 4718592, "linear_dense_nnz": 493056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360128, "linear_attention_total": 2359296, "linear_attention_nnz": 1026816, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 785664, "linear_attention_total": 2359296, "linear_attention_nnz": 675072, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 562176, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.31415802270676, "linear_sparsity": 79.89185474537037}, "speed": {"eval_elapsed_time": 14.54654596094042}, "opt_eval_metrics": {"exact_match": 78.04162724692526, "f1": 85.89832211406967}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 65744386, "linear_total": 84934656, "linear_nnz": 41809920, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3775488, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 3140608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4079616, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 3477504}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4937728, "linear_attention_total": 2359296, "linear_attention_nnz": 1008640, "linear_dense_total": 4718592, "linear_dense_nnz": 3929088}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5286912, "linear_attention_total": 2359296, "linear_attention_nnz": 1197056, "linear_dense_total": 4718592, "linear_dense_nnz": 4089856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5135360, "linear_attention_total": 2359296, "linear_attention_nnz": 1181696, "linear_dense_total": 4718592, "linear_dense_nnz": 3953664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5012480, "linear_attention_total": 2359296, "linear_attention_nnz": 1005568, "linear_dense_total": 4718592, "linear_dense_nnz": 4006912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4720640, "linear_attention_total": 2359296, "linear_attention_nnz": 1043456, "linear_dense_total": 4718592, "linear_dense_nnz": 3677184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3708928, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 2777088}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2311168, "linear_attention_total": 2359296, "linear_attention_nnz": 862208, "linear_dense_total": 4718592, "linear_dense_nnz": 1448960}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1058816, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 458752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 951296, "linear_attention_total": 2359296, "linear_attention_nnz": 456704, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 831488, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 541696}}, "total_sparsity": 39.6248852522324, "linear_sparsity": 50.774016203703695}, "speed": {"eval_elapsed_time": 27.04506094707176}, "opt_eval_metrics": {"exact_match": 80.48249763481552, "f1": 87.91705961229685}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 38110440, "linear_total": 84934656, "linear_nnz": 14151910, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1521793, "linear_attention_total": 2359296, "linear_attention_nnz": 87221, "linear_dense_total": 4718592, "linear_dense_nnz": 1434572}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637844, "linear_attention_total": 2359296, "linear_attention_nnz": 157517, "linear_dense_total": 4718592, "linear_dense_nnz": 1480327}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1723746, "linear_attention_total": 2359296, "linear_attention_nnz": 188172, "linear_dense_total": 4718592, "linear_dense_nnz": 1535574}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742961, "linear_attention_total": 2359296, "linear_attention_nnz": 230341, "linear_dense_total": 4718592, "linear_dense_nnz": 1512620}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1687428, "linear_attention_total": 2359296, "linear_attention_nnz": 240387, "linear_dense_total": 4718592, "linear_dense_nnz": 1447041}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1623377, "linear_attention_total": 2359296, "linear_attention_nnz": 195780, "linear_dense_total": 4718592, "linear_dense_nnz": 1427597}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1429982, "linear_attention_total": 2359296, "linear_attention_nnz": 184963, "linear_dense_total": 4718592, "linear_dense_nnz": 1245019}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130199, "linear_attention_total": 2359296, "linear_attention_nnz": 172954, "linear_dense_total": 4718592, "linear_dense_nnz": 957245}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 773896, "linear_attention_total": 2359296, "linear_attention_nnz": 138133, "linear_dense_total": 4718592, "linear_dense_nnz": 635763}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417863, "linear_attention_total": 2359296, "linear_attention_nnz": 112972, "linear_dense_total": 4718592, "linear_dense_nnz": 304891}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279992, "linear_attention_total": 2359296, "linear_attention_nnz": 75446, "linear_dense_total": 4718592, "linear_dense_nnz": 204546}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 182829, "linear_attention_total": 2359296, "linear_attention_nnz": 38439, "linear_dense_total": 4718592, "linear_dense_nnz": 144390}}, "total_sparsity": 65.00199746198996, "linear_sparsity": 83.3378850677867}, "speed": {"eval_elapsed_time": 35.92588178999722}, "opt_eval_metrics": {"exact_match": 80.22705771050141, "f1": 88.08154392563726}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37366845, "linear_total": 84934656, "linear_nnz": 13408315, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1442154, "linear_attention_total": 2359296, "linear_attention_nnz": 79341, "linear_dense_total": 4718592, "linear_dense_nnz": 1362813}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1557975, "linear_attention_total": 2359296, "linear_attention_nnz": 146964, "linear_dense_total": 4718592, "linear_dense_nnz": 1411011}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637409, "linear_attention_total": 2359296, "linear_attention_nnz": 173655, "linear_dense_total": 4718592, "linear_dense_nnz": 1463754}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1655712, "linear_attention_total": 2359296, "linear_attention_nnz": 213353, "linear_dense_total": 4718592, "linear_dense_nnz": 1442359}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601748, "linear_attention_total": 2359296, "linear_attention_nnz": 221518, "linear_dense_total": 4718592, "linear_dense_nnz": 1380230}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1539647, "linear_attention_total": 2359296, "linear_attention_nnz": 179373, "linear_dense_total": 4718592, "linear_dense_nnz": 1360274}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1352289, "linear_attention_total": 2359296, "linear_attention_nnz": 168393, "linear_dense_total": 4718592, "linear_dense_nnz": 1183896}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1066215, "linear_attention_total": 2359296, "linear_attention_nnz": 159612, "linear_dense_total": 4718592, "linear_dense_nnz": 906603}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727923, "linear_attention_total": 2359296, "linear_attention_nnz": 127230, "linear_dense_total": 4718592, "linear_dense_nnz": 600693}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 390947, "linear_attention_total": 2359296, "linear_attention_nnz": 105257, "linear_dense_total": 4718592, "linear_dense_nnz": 285690}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 262617, "linear_attention_total": 2359296, "linear_attention_nnz": 70746, "linear_dense_total": 4718592, "linear_dense_nnz": 191871}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 173679, "linear_attention_total": 2359296, "linear_attention_nnz": 36271, "linear_dense_total": 4718592, "linear_dense_nnz": 137408}}, "total_sparsity": 65.68486388119823, "linear_sparsity": 84.21337575088313}, "speed": {"eval_elapsed_time": 35.89134427602403}, "opt_eval_metrics": {"exact_match": 80.53926206244087, "f1": 88.07603620459668}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 45252556, "linear_total": 84934656, "linear_nnz": 21294026, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2152743, "linear_attention_total": 2359296, "linear_attention_nnz": 158912, "linear_dense_total": 4718592, "linear_dense_nnz": 1993831}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2265132, "linear_attention_total": 2359296, "linear_attention_nnz": 234395, "linear_dense_total": 4718592, "linear_dense_nnz": 2030737}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415512, "linear_attention_total": 2359296, "linear_attention_nnz": 301048, "linear_dense_total": 4718592, "linear_dense_nnz": 2114464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465567, "linear_attention_total": 2359296, "linear_attention_nnz": 358791, "linear_dense_total": 4718592, "linear_dense_nnz": 2106776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2457267, "linear_attention_total": 2359296, "linear_attention_nnz": 398673, "linear_dense_total": 4718592, "linear_dense_nnz": 2058594}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2410577, "linear_attention_total": 2359296, "linear_attention_nnz": 367333, "linear_dense_total": 4718592, "linear_dense_nnz": 2043244}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2206780, "linear_attention_total": 2359296, "linear_attention_nnz": 344288, "linear_dense_total": 4718592, "linear_dense_nnz": 1862492}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1819031, "linear_attention_total": 2359296, "linear_attention_nnz": 304514, "linear_dense_total": 4718592, "linear_dense_nnz": 1514517}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1364821, "linear_attention_total": 2359296, "linear_attention_nnz": 265513, "linear_dense_total": 4718592, "linear_dense_nnz": 1099308}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 828990, "linear_attention_total": 2359296, "linear_attention_nnz": 201714, "linear_dense_total": 4718592, "linear_dense_nnz": 627276}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574541, "linear_attention_total": 2359296, "linear_attention_nnz": 134277, "linear_dense_total": 4718592, "linear_dense_nnz": 440264}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 333065, "linear_attention_total": 2359296, "linear_attention_nnz": 63309, "linear_dense_total": 4718592, "linear_dense_nnz": 269756}}, "total_sparsity": 58.4431701722824, "linear_sparsity": 74.92893124804085}, "speed": {"eval_elapsed_time": 38.17887881118804}, "opt_eval_metrics": {"exact_match": 81.40018921475875, "f1": 88.66263407974378}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 38467586, "linear_total": 84934656, "linear_nnz": 14509056, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1740288, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 940032, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 448512}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1992192, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1728000, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1651200, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 717312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245696, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877056, "linear_attention_total": 2359296, "linear_attention_nnz": 442368, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049088, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 629760, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 233472}}, "total_sparsity": 64.67401918059409, "linear_sparsity": 82.9173900462963}, "speed": {"eval_elapsed_time": 11.783776527037844}, "opt_eval_metrics": {"exact_match": 77.9848628192999, "f1": 85.88807770994393}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 38065154, "linear_total": 84934656, "linear_nnz": 14106624, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1669632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 913920, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 422400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969152, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 631296}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1559040, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1219584, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 471552}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 955392, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1090560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 431616, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.04358500448319, "linear_sparsity": 83.3912037037037}, "speed": {"eval_elapsed_time": 11.86458179494366}, "opt_eval_metrics": {"exact_match": 77.94701986754967, "f1": 85.90050035022541}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38095874, "linear_total": 84934656, "linear_nnz": 14137344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907776, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1967616, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 591360}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1711104, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1608192, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1214976, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 625152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1161216, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 953856, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 413184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1041408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 482304, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 757248, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.0153738728886, "linear_sparsity": 83.35503472222221}, "speed": {"eval_elapsed_time": 11.63978576194495}, "opt_eval_metrics": {"exact_match": 77.43614001892148, "f1": 85.51882546766822}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35435778, "linear_total": 84934656, "linear_nnz": 11477248, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887040, "linear_attention_total": 2359296, "linear_attention_nnz": 384768, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1057792, "linear_attention_total": 2359296, "linear_attention_nnz": 355840, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1285888, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497088, "linear_attention_total": 2359296, "linear_attention_nnz": 672256, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1350912, "linear_attention_total": 2359296, "linear_attention_nnz": 418560, "linear_dense_total": 4718592, "linear_dense_nnz": 932352}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1395712, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1154816, "linear_attention_total": 2359296, "linear_attention_nnz": 498944, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059840, "linear_attention_total": 2359296, "linear_attention_nnz": 497664, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 609024, "linear_attention_total": 2359296, "linear_attention_nnz": 297216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 436224, "linear_attention_total": 2359296, "linear_attention_nnz": 316416, "linear_dense_total": 4718592, "linear_dense_nnz": 119808}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 256512, "linear_dense_total": 4718592, "linear_dense_nnz": 115200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371200, "linear_attention_total": 2359296, "linear_attention_nnz": 150016, "linear_dense_total": 4718592, "linear_dense_nnz": 221184}}, "total_sparsity": 67.45822277621669, "linear_sparsity": 86.4869671103395}, "speed": {"eval_elapsed_time": 13.864284622017294}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.28341140334766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 49759613, "linear_total": 84934656, "linear_nnz": 25846272, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3251712, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2803200, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1918464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3320832, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3353088, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2075136}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2469888, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2322432, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1929216}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2098176, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1508352}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1641984, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1248768}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638912, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566784, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1075200, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 583680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1304064, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1009152}}, "total_sparsity": 54.304199529987116, "linear_sparsity": 69.56922743055556}, "speed": {"eval_elapsed_time": 14.753634401829913}, "opt_eval_metrics": {"exact_match": 77.8713339640492, "f1": 85.86552240887988}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 47529298, "linear_total": 84934656, "linear_nnz": 23617536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2446848, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1660416}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2978304, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1995264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2216448, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1724928}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1824768, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1333248}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526784, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1133568}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35236717199184, "linear_sparsity": 72.19328703703704}, "speed": {"eval_elapsed_time": 14.023887678980827}, "opt_eval_metrics": {"exact_match": 78.06054872280038, "f1": 85.94002543374285}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47521613, "linear_total": 84934656, "linear_nnz": 23609856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2445312, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1658880}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2976768, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1993728}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214912, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1723392}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1823232, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1525248, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1132032}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35942454654601, "linear_sparsity": 72.2023292824074}, "speed": {"eval_elapsed_time": 14.008215571055189}, "opt_eval_metrics": {"exact_match": 78.10785241248817, "f1": 86.00835164251778}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35398714, "linear_total": 84934656, "linear_nnz": 11493376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907264, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1074176, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253376, "linear_attention_total": 2359296, "linear_attention_nnz": 402432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1508352, "linear_attention_total": 2359296, "linear_attention_nnz": 681984, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1328640, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1422848, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1094656, "linear_attention_total": 2359296, "linear_attention_nnz": 449536, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1102848, "linear_attention_total": 2359296, "linear_attention_nnz": 577536, "linear_dense_total": 4718592, "linear_dense_nnz": 525312}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 628224, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 434176, "linear_attention_total": 2359296, "linear_attention_nnz": 320512, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 377344, "linear_attention_total": 2359296, "linear_attention_nnz": 256000, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 361472, "linear_attention_total": 2359296, "linear_attention_nnz": 146432, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 67.49225980035152, "linear_sparsity": 86.46797839506173}, "speed": {"eval_elapsed_time": 13.743516992079094}, "opt_eval_metrics": {"exact_match": 77.18070009460737, "f1": 85.6109462422114}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold_apme-sigmoied_threshold_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 61067266, "linear_total": 84934656, "linear_nnz": 37108736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3221504, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 2607104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3504128, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 2899968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4439040, "linear_attention_total": 2359296, "linear_attention_nnz": 730112, "linear_dense_total": 4718592, "linear_dense_nnz": 3708928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4859904, "linear_attention_total": 2359296, "linear_attention_nnz": 1044480, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4734976, "linear_attention_total": 2359296, "linear_attention_nnz": 1012736, "linear_dense_total": 4718592, "linear_dense_nnz": 3722240}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4660224, "linear_attention_total": 2359296, "linear_attention_nnz": 882688, "linear_dense_total": 4718592, "linear_dense_nnz": 3777536}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4239360, "linear_attention_total": 2359296, "linear_attention_nnz": 980992, "linear_dense_total": 4718592, "linear_dense_nnz": 3258368}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3137536, "linear_attention_total": 2359296, "linear_attention_nnz": 903168, "linear_dense_total": 4718592, "linear_dense_nnz": 2234368}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835008, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 1124352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877568, "linear_attention_total": 2359296, "linear_attention_nnz": 552960, "linear_dense_total": 4718592, "linear_dense_nnz": 324608}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852992, "linear_attention_total": 2359296, "linear_attention_nnz": 401408, "linear_dense_total": 4718592, "linear_dense_nnz": 451584}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746496, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 501760}}, "total_sparsity": 43.920030037508496, "linear_sparsity": 56.309076003086425}, "speed": {"eval_elapsed_time": 25.03221789188683}, "opt_eval_metrics": {"exact_match": 79.64049195837275, "f1": 87.40026291426761}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 43891202, "linear_total": 84934656, "linear_nnz": 19932672, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045184, "linear_attention_total": 2359296, "linear_attention_nnz": 427776, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102784, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 1708032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2424576, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 1955328}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2502912, "linear_attention_total": 2359296, "linear_attention_nnz": 579840, "linear_dense_total": 4718592, "linear_dense_nnz": 1923072}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2376960, "linear_attention_total": 2359296, "linear_attention_nnz": 539904, "linear_dense_total": 4718592, "linear_dense_nnz": 1837056}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2201856, "linear_attention_total": 2359296, "linear_attention_nnz": 424704, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1907712, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 1468416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1580544, "linear_attention_total": 2359296, "linear_attention_nnz": 428544, "linear_dense_total": 4718592, "linear_dense_nnz": 1152000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1095168, "linear_attention_total": 2359296, "linear_attention_nnz": 397824, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527616, "linear_attention_total": 2359296, "linear_attention_nnz": 235776, "linear_dense_total": 4718592, "linear_dense_nnz": 291840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 428544, "linear_attention_total": 2359296, "linear_attention_nnz": 182784, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738816, "linear_attention_total": 2359296, "linear_attention_nnz": 112128, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}}, "total_sparsity": 59.6933438975695, "linear_sparsity": 76.53175636574075}, "speed": {"eval_elapsed_time": 23.130286294035614}, "opt_eval_metrics": {"exact_match": 79.15799432355723, "f1": 86.94169166073364}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49229570, "linear_total": 84934656, "linear_nnz": 25271040, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214400, "linear_attention_total": 2359296, "linear_attention_nnz": 721408, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2390784, "linear_attention_total": 2359296, "linear_attention_nnz": 635136, "linear_dense_total": 4718592, "linear_dense_nnz": 1755648}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850560, "linear_attention_total": 2359296, "linear_attention_nnz": 972032, "linear_dense_total": 4718592, "linear_dense_nnz": 1878528}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3188736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 1932288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3149824, "linear_attention_total": 2359296, "linear_attention_nnz": 1260544, "linear_dense_total": 4718592, "linear_dense_nnz": 1889280}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 1784832}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2455040, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015744, "linear_attention_total": 2359296, "linear_attention_nnz": 988160, "linear_dense_total": 4718592, "linear_dense_nnz": 1027584}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1550080, "linear_attention_total": 2359296, "linear_attention_nnz": 903424, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 886784, "linear_attention_total": 2359296, "linear_attention_nnz": 636416, "linear_dense_total": 4718592, "linear_dense_nnz": 250368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 682752, "linear_attention_total": 2359296, "linear_attention_nnz": 484608, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 980224, "linear_attention_total": 2359296, "linear_attention_nnz": 313600, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}}, "total_sparsity": 54.79095450471988, "linear_sparsity": 70.2464916087963}, "speed": {"eval_elapsed_time": 20.265998144168407}, "opt_eval_metrics": {"exact_match": 80.6244087038789, "f1": 88.07723643002453}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 42173698, "linear_total": 84934656, "linear_nnz": 18215168, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1516544, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1758464, "linear_attention_total": 2359296, "linear_attention_nnz": 564992, "linear_dense_total": 4718592, "linear_dense_nnz": 1193472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2030080, "linear_attention_total": 2359296, "linear_attention_nnz": 646144, "linear_dense_total": 4718592, "linear_dense_nnz": 1383936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2328832, "linear_attention_total": 2359296, "linear_attention_nnz": 969472, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2283264, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1425408}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2099200, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 1396224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1846784, "linear_attention_total": 2359296, "linear_attention_nnz": 774656, "linear_dense_total": 4718592, "linear_dense_nnz": 1072128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1589760, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 783360}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 967424, "linear_attention_total": 2359296, "linear_attention_nnz": 520448, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 617216, "linear_attention_total": 2359296, "linear_attention_nnz": 435968, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 521984, "linear_attention_total": 2359296, "linear_attention_nnz": 354560, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655616, "linear_attention_total": 2359296, "linear_attention_nnz": 231680, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.27058124647028, "linear_sparsity": 78.55390383873457}, "speed": {"eval_elapsed_time": 16.755018649157137}, "opt_eval_metrics": {"exact_match": 79.25260170293284, "f1": 86.93528973939952}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42038274, "linear_total": 84934656, "linear_nnz": 18079744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1493248, "linear_attention_total": 2359296, "linear_attention_nnz": 519424, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757440, "linear_attention_total": 2359296, "linear_attention_nnz": 565504, "linear_dense_total": 4718592, "linear_dense_nnz": 1191936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028800, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1382400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2297088, "linear_attention_total": 2359296, "linear_attention_nnz": 937728, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2270464, "linear_attention_total": 2359296, "linear_attention_nnz": 846592, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081792, "linear_attention_total": 2359296, "linear_attention_nnz": 688640, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815296, "linear_attention_total": 2359296, "linear_attention_nnz": 744704, "linear_dense_total": 4718592, "linear_dense_nnz": 1070592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1613312, "linear_attention_total": 2359296, "linear_attention_nnz": 831488, "linear_dense_total": 4718592, "linear_dense_nnz": 781824}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 522496, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 594944, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 513792, "linear_attention_total": 2359296, "linear_attention_nnz": 346368, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 644096, "linear_attention_total": 2359296, "linear_attention_nnz": 220160, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.39494531824976, "linear_sparsity": 78.7133487654321}, "speed": {"eval_elapsed_time": 16.721035415073857}, "opt_eval_metrics": {"exact_match": 79.2620624408704, "f1": 86.97825692623259}}}, "base_speed_report": {"eval_elapsed_time": 38.708956059999764}} \ No newline at end of file diff --git a/analysis/files/results/results11.json b/analysis/files/results/results11.json deleted file mode 100644 index a3eabd4e..00000000 --- a/analysis/files/results/results11.json +++ /dev/null @@ -1 +0,0 @@ -{"checkpoints": {"/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": {"stats": {"total": 90984386, "nnz": 40951962, "linear_total": 67043328, "linear_nnz": 17043456, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1268736, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "1": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1296384, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "2": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1440768, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "3": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2006016, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "4": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1709568, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1863168, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "6": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1628160, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1901568, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 525312}, "8": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 923136, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1096704, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1104384, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 804864, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 54.990121052199, "linear_sparsity": 74.57844574780059}, "speed": {"eval_elapsed_time": 15.02683336683549}, "opt_eval_metrics": {"exact_match": 78.63765373699148, "f1": 86.69392512957342}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-15000": {"stats": {"total": 95510786, "nnz": 52448657, "linear_total": 71565312, "linear_nnz": 28531200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2125824, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2357760, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3317760, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3334656, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3495936, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2809344, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2646528, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2363904, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2119680, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1428480, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1367040, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1164288, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 45.08614241746477, "linear_sparsity": 60.1326407967033}, "speed": {"eval_elapsed_time": 18.68422868498601}, "opt_eval_metrics": {"exact_match": 80.80416272469253, "f1": 88.20260662536118}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-20000": {"stats": {"total": 95510786, "nnz": 52448657, "linear_total": 71565312, "linear_nnz": 28531200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2125824, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2357760, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3317760, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3334656, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3495936, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2809344, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2646528, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2363904, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2119680, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1428480, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1367040, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1164288, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 45.08614241746477, "linear_sparsity": 60.1326407967033}, "speed": {"eval_elapsed_time": 18.70178026217036}, "opt_eval_metrics": {"exact_match": 80.6717123935667, "f1": 88.128983727943}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": {"stats": {"total": 95510786, "nnz": 52448657, "linear_total": 71565312, "linear_nnz": 28531200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2125824, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2357760, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3317760, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3334656, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3495936, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2809344, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2646528, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2363904, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2119680, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1428480, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1367040, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1164288, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 45.08614241746477, "linear_sparsity": 60.1326407967033}, "speed": {"eval_elapsed_time": 18.703147856052965}, "opt_eval_metrics": {"exact_match": 80.68117313150425, "f1": 88.11014400914335}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-20000": {"stats": {"total": 93149186, "nnz": 46161559, "linear_total": 69206016, "linear_nnz": 22248960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1634304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1887744, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2400768, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2588160, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2655744, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 2199552, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2131968, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2092032, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1417728, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1155072, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1130496, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 955392, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 50.443411282198426, "linear_sparsity": 67.85111860795455}, "speed": {"eval_elapsed_time": 16.04654254997149}, "opt_eval_metrics": {"exact_match": 80.01892147587512, "f1": 87.70568682399205}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": {"stats": {"total": 93149186, "nnz": 46161559, "linear_total": 69206016, "linear_nnz": 22248960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1634304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1887744, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2400768, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2588160, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2655744, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 2199552, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2131968, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2092032, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1417728, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1155072, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1130496, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 955392, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 50.443411282198426, "linear_sparsity": 67.85111860795455}, "speed": {"eval_elapsed_time": 16.063478752039373}, "opt_eval_metrics": {"exact_match": 80.02838221381268, "f1": 87.70940223967354}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l20--2021-01-20--19-01-34/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 63685078, "linear_total": 84934656, "linear_nnz": 39741824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3854752, "linear_attention_total": 2359296, "linear_attention_nnz": 261808, "linear_dense_total": 4718592, "linear_dense_nnz": 3592944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4073232, "linear_attention_total": 2359296, "linear_attention_nnz": 407856, "linear_dense_total": 4718592, "linear_dense_nnz": 3665376}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4218016, "linear_attention_total": 2359296, "linear_attention_nnz": 470352, "linear_dense_total": 4718592, "linear_dense_nnz": 3747664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4318192, "linear_attention_total": 2359296, "linear_attention_nnz": 586320, "linear_dense_total": 4718592, "linear_dense_nnz": 3731872}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4294272, "linear_attention_total": 2359296, "linear_attention_nnz": 598112, "linear_dense_total": 4718592, "linear_dense_nnz": 3696160}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4191568, "linear_attention_total": 2359296, "linear_attention_nnz": 540976, "linear_dense_total": 4718592, "linear_dense_nnz": 3650592}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4018960, "linear_attention_total": 2359296, "linear_attention_nnz": 518320, "linear_dense_total": 4718592, "linear_dense_nnz": 3500640}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3692480, "linear_attention_total": 2359296, "linear_attention_nnz": 494608, "linear_dense_total": 4718592, "linear_dense_nnz": 3197872}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3064736, "linear_attention_total": 2359296, "linear_attention_nnz": 381872, "linear_dense_total": 4718592, "linear_dense_nnz": 2682864}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1882688, "linear_attention_total": 2359296, "linear_attention_nnz": 281888, "linear_dense_total": 4718592, "linear_dense_nnz": 1600800}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307728, "linear_attention_total": 2359296, "linear_attention_nnz": 212544, "linear_dense_total": 4718592, "linear_dense_nnz": 1095184}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 825200, "linear_attention_total": 2359296, "linear_attention_nnz": 122704, "linear_dense_total": 4718592, "linear_dense_nnz": 702496}}, "total_sparsity": 41.51601184669167, "linear_sparsity": 53.208942177854944}, "speed": {"eval_elapsed_time": 33.01966134808026}, "opt_eval_metrics": {"exact_match": 80.52980132450331, "f1": 88.02284574429551}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l20--2021-01-20--19-01-34/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 63495382, "linear_total": 84934656, "linear_nnz": 39552208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3842976, "linear_attention_total": 2359296, "linear_attention_nnz": 258016, "linear_dense_total": 4718592, "linear_dense_nnz": 3584960}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4064144, "linear_attention_total": 2359296, "linear_attention_nnz": 404784, "linear_dense_total": 4718592, "linear_dense_nnz": 3659360}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4202080, "linear_attention_total": 2359296, "linear_attention_nnz": 460752, "linear_dense_total": 4718592, "linear_dense_nnz": 3741328}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4301216, "linear_attention_total": 2359296, "linear_attention_nnz": 577184, "linear_dense_total": 4718592, "linear_dense_nnz": 3724032}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4277440, "linear_attention_total": 2359296, "linear_attention_nnz": 587792, "linear_dense_total": 4718592, "linear_dense_nnz": 3689648}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4172464, "linear_attention_total": 2359296, "linear_attention_nnz": 530480, "linear_dense_total": 4718592, "linear_dense_nnz": 3641984}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3999744, "linear_attention_total": 2359296, "linear_attention_nnz": 508336, "linear_dense_total": 4718592, "linear_dense_nnz": 3491408}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3673360, "linear_attention_total": 2359296, "linear_attention_nnz": 486304, "linear_dense_total": 4718592, "linear_dense_nnz": 3187056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3043376, "linear_attention_total": 2359296, "linear_attention_nnz": 374032, "linear_dense_total": 4718592, "linear_dense_nnz": 2669344}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1863968, "linear_attention_total": 2359296, "linear_attention_nnz": 276992, "linear_dense_total": 4718592, "linear_dense_nnz": 1586976}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1293056, "linear_attention_total": 2359296, "linear_attention_nnz": 209136, "linear_dense_total": 4718592, "linear_dense_nnz": 1083920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818384, "linear_attention_total": 2359296, "linear_attention_nnz": 120976, "linear_dense_total": 4718592, "linear_dense_nnz": 697408}}, "total_sparsity": 41.69021558428826, "linear_sparsity": 53.432191448447156}, "speed": {"eval_elapsed_time": 32.776620995020494}, "opt_eval_metrics": {"exact_match": 80.09460737937559, "f1": 87.80889686617203}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l40--2021-01-20--19-02-03/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 54109530, "linear_total": 84934656, "linear_nnz": 30171936, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3151120, "linear_attention_total": 2359296, "linear_attention_nnz": 172416, "linear_dense_total": 4718592, "linear_dense_nnz": 2978704}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3411360, "linear_attention_total": 2359296, "linear_attention_nnz": 308192, "linear_dense_total": 4718592, "linear_dense_nnz": 3103168}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3491136, "linear_attention_total": 2359296, "linear_attention_nnz": 285568, "linear_dense_total": 4718592, "linear_dense_nnz": 3205568}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3603168, "linear_attention_total": 2359296, "linear_attention_nnz": 437904, "linear_dense_total": 4718592, "linear_dense_nnz": 3165264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3410880, "linear_attention_total": 2359296, "linear_attention_nnz": 321040, "linear_dense_total": 4718592, "linear_dense_nnz": 3089840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3356416, "linear_attention_total": 2359296, "linear_attention_nnz": 332784, "linear_dense_total": 4718592, "linear_dense_nnz": 3023632}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3072896, "linear_attention_total": 2359296, "linear_attention_nnz": 288464, "linear_dense_total": 4718592, "linear_dense_nnz": 2784432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2683232, "linear_attention_total": 2359296, "linear_attention_nnz": 328464, "linear_dense_total": 4718592, "linear_dense_nnz": 2354768}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1957200, "linear_attention_total": 2359296, "linear_attention_nnz": 204832, "linear_dense_total": 4718592, "linear_dense_nnz": 1752368}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 905552, "linear_attention_total": 2359296, "linear_attention_nnz": 189616, "linear_dense_total": 4718592, "linear_dense_nnz": 715936}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667120, "linear_attention_total": 2359296, "linear_attention_nnz": 140384, "linear_dense_total": 4718592, "linear_dense_nnz": 526736}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 461856, "linear_attention_total": 2359296, "linear_attention_nnz": 84608, "linear_dense_total": 4718592, "linear_dense_nnz": 377248}}, "total_sparsity": 50.30953543778212, "linear_sparsity": 64.47629575376158}, "speed": {"eval_elapsed_time": 29.022300366079435}, "opt_eval_metrics": {"exact_match": 79.64049195837275, "f1": 87.31499809166372}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l40--2021-01-20--19-02-03/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53891686, "linear_total": 84934656, "linear_nnz": 29954112, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3130496, "linear_attention_total": 2359296, "linear_attention_nnz": 169136, "linear_dense_total": 4718592, "linear_dense_nnz": 2961360}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3393488, "linear_attention_total": 2359296, "linear_attention_nnz": 304464, "linear_dense_total": 4718592, "linear_dense_nnz": 3089024}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3470880, "linear_attention_total": 2359296, "linear_attention_nnz": 279216, "linear_dense_total": 4718592, "linear_dense_nnz": 3191664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3580464, "linear_attention_total": 2359296, "linear_attention_nnz": 429728, "linear_dense_total": 4718592, "linear_dense_nnz": 3150736}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3390736, "linear_attention_total": 2359296, "linear_attention_nnz": 314688, "linear_dense_total": 4718592, "linear_dense_nnz": 3076048}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3334432, "linear_attention_total": 2359296, "linear_attention_nnz": 326416, "linear_dense_total": 4718592, "linear_dense_nnz": 3008016}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3048464, "linear_attention_total": 2359296, "linear_attention_nnz": 281984, "linear_dense_total": 4718592, "linear_dense_nnz": 2766480}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2658992, "linear_attention_total": 2359296, "linear_attention_nnz": 320352, "linear_dense_total": 4718592, "linear_dense_nnz": 2338640}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1936656, "linear_attention_total": 2359296, "linear_attention_nnz": 200608, "linear_dense_total": 4718592, "linear_dense_nnz": 1736048}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 892160, "linear_attention_total": 2359296, "linear_attention_nnz": 185008, "linear_dense_total": 4718592, "linear_dense_nnz": 707152}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 660320, "linear_attention_total": 2359296, "linear_attention_nnz": 137920, "linear_dense_total": 4718592, "linear_dense_nnz": 522400}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 457024, "linear_attention_total": 2359296, "linear_attention_nnz": 82480, "linear_dense_total": 4718592, "linear_dense_nnz": 374544}}, "total_sparsity": 50.50958835936713, "linear_sparsity": 64.7327564380787}, "speed": {"eval_elapsed_time": 29.01672533689998}, "opt_eval_metrics": {"exact_match": 79.06338694418164, "f1": 86.86293366416082}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53990689, "linear_total": 84934656, "linear_nnz": 30067968, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2828544, "linear_attention_total": 2359296, "linear_attention_nnz": 880896, "linear_dense_total": 4718592, "linear_dense_nnz": 1947648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2819840, "linear_attention_total": 2359296, "linear_attention_nnz": 849152, "linear_dense_total": 4718592, "linear_dense_nnz": 1970688}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3367424, "linear_attention_total": 2359296, "linear_attention_nnz": 1169408, "linear_dense_total": 4718592, "linear_dense_nnz": 2198016}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3521280, "linear_attention_total": 2359296, "linear_attention_nnz": 1352448, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3693824, "linear_attention_total": 2359296, "linear_attention_nnz": 1524992, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3554560, "linear_attention_total": 2359296, "linear_attention_nnz": 1511680, "linear_dense_total": 4718592, "linear_dense_nnz": 2042880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2935296, "linear_attention_total": 2359296, "linear_attention_nnz": 1336320, "linear_dense_total": 4718592, "linear_dense_nnz": 1598976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2452992, "linear_attention_total": 2359296, "linear_attention_nnz": 1178112, "linear_dense_total": 4718592, "linear_dense_nnz": 1274880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1836032, "linear_attention_total": 2359296, "linear_attention_nnz": 1134080, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1024000, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 321024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 812032, "linear_attention_total": 2359296, "linear_attention_nnz": 583168, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222144, "linear_attention_total": 2359296, "linear_attention_nnz": 397312, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}}, "total_sparsity": 50.41867082482094, "linear_sparsity": 64.59870515046296}, "speed": {"eval_elapsed_time": 23.56436571292579}, "opt_eval_metrics": {"exact_match": 80.93661305581836, "f1": 88.35425478567389}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53994017, "linear_total": 84934656, "linear_nnz": 30071296, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2825984, "linear_attention_total": 2359296, "linear_attention_nnz": 878336, "linear_dense_total": 4718592, "linear_dense_nnz": 1947648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2823424, "linear_attention_total": 2359296, "linear_attention_nnz": 852736, "linear_dense_total": 4718592, "linear_dense_nnz": 1970688}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3366400, "linear_attention_total": 2359296, "linear_attention_nnz": 1168384, "linear_dense_total": 4718592, "linear_dense_nnz": 2198016}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3529216, "linear_attention_total": 2359296, "linear_attention_nnz": 1360384, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3694080, "linear_attention_total": 2359296, "linear_attention_nnz": 1525248, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3562240, "linear_attention_total": 2359296, "linear_attention_nnz": 1519360, "linear_dense_total": 4718592, "linear_dense_nnz": 2042880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2944768, "linear_attention_total": 2359296, "linear_attention_nnz": 1345792, "linear_dense_total": 4718592, "linear_dense_nnz": 1598976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2450176, "linear_attention_total": 2359296, "linear_attention_nnz": 1175296, "linear_dense_total": 4718592, "linear_dense_nnz": 1274880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828864, "linear_attention_total": 2359296, "linear_attention_nnz": 1126912, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023488, "linear_attention_total": 2359296, "linear_attention_nnz": 702464, "linear_dense_total": 4718592, "linear_dense_nnz": 321024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 812032, "linear_attention_total": 2359296, "linear_attention_nnz": 583168, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1210624, "linear_attention_total": 2359296, "linear_attention_nnz": 385792, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}}, "total_sparsity": 50.41561461889819, "linear_sparsity": 64.5947868441358}, "speed": {"eval_elapsed_time": 23.61654355400242}, "opt_eval_metrics": {"exact_match": 81.11636707663197, "f1": 88.26635621180897}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 48926434, "linear_total": 84934656, "linear_nnz": 25008128, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102784, "linear_attention_total": 2359296, "linear_attention_nnz": 976896, "linear_dense_total": 4718592, "linear_dense_nnz": 1125888}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2018816, "linear_attention_total": 2359296, "linear_attention_nnz": 733184, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2776064, "linear_attention_total": 2359296, "linear_attention_nnz": 1252352, "linear_dense_total": 4718592, "linear_dense_nnz": 1523712}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2993664, "linear_attention_total": 2359296, "linear_attention_nnz": 1437696, "linear_dense_total": 4718592, "linear_dense_nnz": 1555968}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3162624, "linear_attention_total": 2359296, "linear_attention_nnz": 1545216, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3089408, "linear_attention_total": 2359296, "linear_attention_nnz": 1574912, "linear_dense_total": 4718592, "linear_dense_nnz": 1514496}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2505216, "linear_attention_total": 2359296, "linear_attention_nnz": 1370112, "linear_dense_total": 4718592, "linear_dense_nnz": 1135104}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2026496, "linear_attention_total": 2359296, "linear_attention_nnz": 1178624, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1665536, "linear_attention_total": 2359296, "linear_attention_nnz": 1190912, "linear_dense_total": 4718592, "linear_dense_nnz": 474624}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 957440, "linear_attention_total": 2359296, "linear_attention_nnz": 748544, "linear_dense_total": 4718592, "linear_dense_nnz": 208896}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 805888, "linear_attention_total": 2359296, "linear_attention_nnz": 636928, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 904192, "linear_attention_total": 2359296, "linear_attention_nnz": 418816, "linear_dense_total": 4718592, "linear_dense_nnz": 485376}}, "total_sparsity": 55.069333723048565, "linear_sparsity": 70.55603780864197}, "speed": {"eval_elapsed_time": 21.182856186991557}, "opt_eval_metrics": {"exact_match": 80.79470198675497, "f1": 88.10958975740277}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 48725622, "linear_total": 84934656, "linear_nnz": 24807424, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2098688, "linear_attention_total": 2359296, "linear_attention_nnz": 991232, "linear_dense_total": 4718592, "linear_dense_nnz": 1107456}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2003456, "linear_attention_total": 2359296, "linear_attention_nnz": 730112, "linear_dense_total": 4718592, "linear_dense_nnz": 1273344}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2731008, "linear_attention_total": 2359296, "linear_attention_nnz": 1225728, "linear_dense_total": 4718592, "linear_dense_nnz": 1505280}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2977280, "linear_attention_total": 2359296, "linear_attention_nnz": 1433600, "linear_dense_total": 4718592, "linear_dense_nnz": 1543680}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3176448, "linear_attention_total": 2359296, "linear_attention_nnz": 1566720, "linear_dense_total": 4718592, "linear_dense_nnz": 1609728}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3081216, "linear_attention_total": 2359296, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1508352}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2487808, "linear_attention_total": 2359296, "linear_attention_nnz": 1361920, "linear_dense_total": 4718592, "linear_dense_nnz": 1125888}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2003456, "linear_attention_total": 2359296, "linear_attention_nnz": 1166336, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1617408, "linear_attention_total": 2359296, "linear_attention_nnz": 1148928, "linear_dense_total": 4718592, "linear_dense_nnz": 468480}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 945664, "linear_attention_total": 2359296, "linear_attention_nnz": 738304, "linear_dense_total": 4718592, "linear_dense_nnz": 207360}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 787456, "linear_attention_total": 2359296, "linear_attention_nnz": 624640, "linear_dense_total": 4718592, "linear_dense_nnz": 162816}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 897536, "linear_attention_total": 2359296, "linear_attention_nnz": 419840, "linear_dense_total": 4718592, "linear_dense_nnz": 477696}}, "total_sparsity": 55.25374562922606, "linear_sparsity": 70.79234182098766}, "speed": {"eval_elapsed_time": 21.17357637709938}, "opt_eval_metrics": {"exact_match": 80.48249763481552, "f1": 88.07285498416482}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 48790134, "linear_total": 84934656, "linear_nnz": 24871936, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2086400, "linear_attention_total": 2359296, "linear_attention_nnz": 978944, "linear_dense_total": 4718592, "linear_dense_nnz": 1107456}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1995264, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 1273344}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2738176, "linear_attention_total": 2359296, "linear_attention_nnz": 1232896, "linear_dense_total": 4718592, "linear_dense_nnz": 1505280}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2998784, "linear_attention_total": 2359296, "linear_attention_nnz": 1455104, "linear_dense_total": 4718592, "linear_dense_nnz": 1543680}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3208192, "linear_attention_total": 2359296, "linear_attention_nnz": 1598464, "linear_dense_total": 4718592, "linear_dense_nnz": 1609728}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3104768, "linear_attention_total": 2359296, "linear_attention_nnz": 1596416, "linear_dense_total": 4718592, "linear_dense_nnz": 1508352}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2499072, "linear_attention_total": 2359296, "linear_attention_nnz": 1373184, "linear_dense_total": 4718592, "linear_dense_nnz": 1125888}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2002432, "linear_attention_total": 2359296, "linear_attention_nnz": 1165312, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1631744, "linear_attention_total": 2359296, "linear_attention_nnz": 1163264, "linear_dense_total": 4718592, "linear_dense_nnz": 468480}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 947712, "linear_attention_total": 2359296, "linear_attention_nnz": 740352, "linear_dense_total": 4718592, "linear_dense_nnz": 207360}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 778240, "linear_attention_total": 2359296, "linear_attention_nnz": 615424, "linear_dense_total": 4718592, "linear_dense_nnz": 162816}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 881152, "linear_attention_total": 2359296, "linear_attention_nnz": 403456, "linear_dense_total": 4718592, "linear_dense_nnz": 477696}}, "total_sparsity": 55.19450225287742, "linear_sparsity": 70.71638695987654}, "speed": {"eval_elapsed_time": 21.167539164889604}, "opt_eval_metrics": {"exact_match": 80.70009460737937, "f1": 88.04831949879843}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l10--2021-01-20--19-01-04/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 72930262, "linear_total": 84934656, "linear_nnz": 48982384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4408688, "linear_attention_total": 2359296, "linear_attention_nnz": 428592, "linear_dense_total": 4718592, "linear_dense_nnz": 3980096}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4561328, "linear_attention_total": 2359296, "linear_attention_nnz": 545744, "linear_dense_total": 4718592, "linear_dense_nnz": 4015584}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4791104, "linear_attention_total": 2359296, "linear_attention_nnz": 729664, "linear_dense_total": 4718592, "linear_dense_nnz": 4061440}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4914112, "linear_attention_total": 2359296, "linear_attention_nnz": 851472, "linear_dense_total": 4718592, "linear_dense_nnz": 4062640}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5008736, "linear_attention_total": 2359296, "linear_attention_nnz": 960992, "linear_dense_total": 4718592, "linear_dense_nnz": 4047744}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4908864, "linear_attention_total": 2359296, "linear_attention_nnz": 902768, "linear_dense_total": 4718592, "linear_dense_nnz": 4006096}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4781792, "linear_attention_total": 2359296, "linear_attention_nnz": 861120, "linear_dense_total": 4718592, "linear_dense_nnz": 3920672}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4492512, "linear_attention_total": 2359296, "linear_attention_nnz": 759664, "linear_dense_total": 4718592, "linear_dense_nnz": 3732848}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4061488, "linear_attention_total": 2359296, "linear_attention_nnz": 670096, "linear_dense_total": 4718592, "linear_dense_nnz": 3391392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3105840, "linear_attention_total": 2359296, "linear_attention_nnz": 444064, "linear_dense_total": 4718592, "linear_dense_nnz": 2661776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2422000, "linear_attention_total": 2359296, "linear_attention_nnz": 329968, "linear_dense_total": 4718592, "linear_dense_nnz": 2092032}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1525920, "linear_attention_total": 2359296, "linear_attention_nnz": 190816, "linear_dense_total": 4718592, "linear_dense_nnz": 1335104}}, "total_sparsity": 33.025871793300276, "linear_sparsity": 42.329331386236504}, "speed": {"eval_elapsed_time": 35.11626772303134}, "opt_eval_metrics": {"exact_match": 81.47587511825922, "f1": 88.58172107792693}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l40--2021-01-20--19-00-35/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53211146, "linear_total": 84934656, "linear_nnz": 29278080, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2993984, "linear_attention_total": 2359296, "linear_attention_nnz": 241280, "linear_dense_total": 4718592, "linear_dense_nnz": 2752704}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3330688, "linear_attention_total": 2359296, "linear_attention_nnz": 379584, "linear_dense_total": 4718592, "linear_dense_nnz": 2951104}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3517120, "linear_attention_total": 2359296, "linear_attention_nnz": 322880, "linear_dense_total": 4718592, "linear_dense_nnz": 3194240}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3720576, "linear_attention_total": 2359296, "linear_attention_nnz": 565440, "linear_dense_total": 4718592, "linear_dense_nnz": 3155136}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3455168, "linear_attention_total": 2359296, "linear_attention_nnz": 390400, "linear_dense_total": 4718592, "linear_dense_nnz": 3064768}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3400192, "linear_attention_total": 2359296, "linear_attention_nnz": 406592, "linear_dense_total": 4718592, "linear_dense_nnz": 2993600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2988160, "linear_attention_total": 2359296, "linear_attention_nnz": 356480, "linear_dense_total": 4718592, "linear_dense_nnz": 2631680}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2477696, "linear_attention_total": 2359296, "linear_attention_nnz": 409920, "linear_dense_total": 4718592, "linear_dense_nnz": 2067776}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1612416, "linear_attention_total": 2359296, "linear_attention_nnz": 242048, "linear_dense_total": 4718592, "linear_dense_nnz": 1370368}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 697664, "linear_attention_total": 2359296, "linear_attention_nnz": 224896, "linear_dense_total": 4718592, "linear_dense_nnz": 472768}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591360, "linear_attention_total": 2359296, "linear_attention_nnz": 172352, "linear_dense_total": 4718592, "linear_dense_nnz": 419008}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 493056, "linear_attention_total": 2359296, "linear_attention_nnz": 104768, "linear_dense_total": 4718592, "linear_dense_nnz": 388288}}, "total_sparsity": 51.13454941064908, "linear_sparsity": 65.52870008680556}, "speed": {"eval_elapsed_time": 26.50232954812236}, "opt_eval_metrics": {"exact_match": 78.76064333017976, "f1": 86.75922108224064}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l20--2021-01-19--16-59-13/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 49113499, "linear_total": 84934656, "linear_nnz": 25174883, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2532837, "linear_attention_total": 2359296, "linear_attention_nnz": 278464, "linear_dense_total": 4718592, "linear_dense_nnz": 2254373}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2724403, "linear_attention_total": 2359296, "linear_attention_nnz": 411200, "linear_dense_total": 4718592, "linear_dense_nnz": 2313203}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2772181, "linear_attention_total": 2359296, "linear_attention_nnz": 388544, "linear_dense_total": 4718592, "linear_dense_nnz": 2383637}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2962889, "linear_attention_total": 2359296, "linear_attention_nnz": 616064, "linear_dense_total": 4718592, "linear_dense_nnz": 2346825}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2756799, "linear_attention_total": 2359296, "linear_attention_nnz": 475392, "linear_dense_total": 4718592, "linear_dense_nnz": 2281407}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2741284, "linear_attention_total": 2359296, "linear_attention_nnz": 485760, "linear_dense_total": 4718592, "linear_dense_nnz": 2255524}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2526246, "linear_attention_total": 2359296, "linear_attention_nnz": 436416, "linear_dense_total": 4718592, "linear_dense_nnz": 2089830}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2233017, "linear_attention_total": 2359296, "linear_attention_nnz": 473664, "linear_dense_total": 4718592, "linear_dense_nnz": 1759353}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652692, "linear_attention_total": 2359296, "linear_attention_nnz": 292096, "linear_dense_total": 4718592, "linear_dense_nnz": 1360596}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1056535, "linear_attention_total": 2359296, "linear_attention_nnz": 260864, "linear_dense_total": 4718592, "linear_dense_nnz": 795671}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 795434, "linear_attention_total": 2359296, "linear_attention_nnz": 207872, "linear_dense_total": 4718592, "linear_dense_nnz": 587562}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 420566, "linear_attention_total": 2359296, "linear_attention_nnz": 115648, "linear_dense_total": 4718592, "linear_dense_nnz": 304918}}, "total_sparsity": 54.89754611459343, "linear_sparsity": 70.35970452391072}, "speed": {"eval_elapsed_time": 29.429046569159254}, "opt_eval_metrics": {"exact_match": 80.15137180700094, "f1": 87.62280270760408}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 50398933, "linear_total": 84934656, "linear_nnz": 26460853, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673633, "linear_attention_total": 2359296, "linear_attention_nnz": 341248, "linear_dense_total": 4718592, "linear_dense_nnz": 2332385}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850180, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 2387588}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2871056, "linear_attention_total": 2359296, "linear_attention_nnz": 412672, "linear_dense_total": 4718592, "linear_dense_nnz": 2458384}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114067, "linear_attention_total": 2359296, "linear_attention_nnz": 692736, "linear_dense_total": 4718592, "linear_dense_nnz": 2421331}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2853594, "linear_attention_total": 2359296, "linear_attention_nnz": 505088, "linear_dense_total": 4718592, "linear_dense_nnz": 2348506}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2871518, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 2322654}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2608144, "linear_attention_total": 2359296, "linear_attention_nnz": 469504, "linear_dense_total": 4718592, "linear_dense_nnz": 2138640}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2382631, "linear_attention_total": 2359296, "linear_attention_nnz": 552448, "linear_dense_total": 4718592, "linear_dense_nnz": 1830183}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757175, "linear_attention_total": 2359296, "linear_attention_nnz": 316672, "linear_dense_total": 4718592, "linear_dense_nnz": 1440503}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1151305, "linear_attention_total": 2359296, "linear_attention_nnz": 292096, "linear_dense_total": 4718592, "linear_dense_nnz": 859209}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 873504, "linear_attention_total": 2359296, "linear_attention_nnz": 227328, "linear_dense_total": 4718592, "linear_dense_nnz": 646176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 454046, "linear_attention_total": 2359296, "linear_attention_nnz": 128000, "linear_dense_total": 4718592, "linear_dense_nnz": 326046}}, "total_sparsity": 53.71709208691902, "linear_sparsity": 68.84563469592435}, "speed": {"eval_elapsed_time": 28.692298884037882}, "opt_eval_metrics": {"exact_match": 79.92431409649953, "f1": 87.57193515884181}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 50358753, "linear_total": 84934656, "linear_nnz": 26420688, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2668105, "linear_attention_total": 2359296, "linear_attention_nnz": 335872, "linear_dense_total": 4718592, "linear_dense_nnz": 2332233}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2839080, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 2387496}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2866908, "linear_attention_total": 2359296, "linear_attention_nnz": 408576, "linear_dense_total": 4718592, "linear_dense_nnz": 2458332}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3103682, "linear_attention_total": 2359296, "linear_attention_nnz": 682496, "linear_dense_total": 4718592, "linear_dense_nnz": 2421186}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2853238, "linear_attention_total": 2359296, "linear_attention_nnz": 504832, "linear_dense_total": 4718592, "linear_dense_nnz": 2348406}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2880784, "linear_attention_total": 2359296, "linear_attention_nnz": 558336, "linear_dense_total": 4718592, "linear_dense_nnz": 2322448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2614378, "linear_attention_total": 2359296, "linear_attention_nnz": 475904, "linear_dense_total": 4718592, "linear_dense_nnz": 2138474}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2372808, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 1830088}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1752978, "linear_attention_total": 2359296, "linear_attention_nnz": 312576, "linear_dense_total": 4718592, "linear_dense_nnz": 1440402}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1147129, "linear_attention_total": 2359296, "linear_attention_nnz": 288000, "linear_dense_total": 4718592, "linear_dense_nnz": 859129}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870927, "linear_attention_total": 2359296, "linear_attention_nnz": 224768, "linear_dense_total": 4718592, "linear_dense_nnz": 646159}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 450671, "linear_attention_total": 2359296, "linear_attention_nnz": 124672, "linear_dense_total": 4718592, "linear_dense_nnz": 325999}}, "total_sparsity": 53.75399063078199, "linear_sparsity": 68.89292399088542}, "speed": {"eval_elapsed_time": 28.704244010150433}, "opt_eval_metrics": {"exact_match": 80.02838221381268, "f1": 87.5280353923367}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 58360680, "linear_total": 84934656, "linear_nnz": 34416900, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3232823, "linear_attention_total": 2359296, "linear_attention_nnz": 405824, "linear_dense_total": 4718592, "linear_dense_nnz": 2826999}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3412647, "linear_attention_total": 2359296, "linear_attention_nnz": 543872, "linear_dense_total": 4718592, "linear_dense_nnz": 2868775}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3556851, "linear_attention_total": 2359296, "linear_attention_nnz": 613248, "linear_dense_total": 4718592, "linear_dense_nnz": 2943603}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3708198, "linear_attention_total": 2359296, "linear_attention_nnz": 791424, "linear_dense_total": 4718592, "linear_dense_nnz": 2916774}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3695959, "linear_attention_total": 2359296, "linear_attention_nnz": 819072, "linear_dense_total": 4718592, "linear_dense_nnz": 2876887}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3644598, "linear_attention_total": 2359296, "linear_attention_nnz": 788928, "linear_dense_total": 4718592, "linear_dense_nnz": 2855670}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3486486, "linear_attention_total": 2359296, "linear_attention_nnz": 761600, "linear_dense_total": 4718592, "linear_dense_nnz": 2724886}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114460, "linear_attention_total": 2359296, "linear_attention_nnz": 686464, "linear_dense_total": 4718592, "linear_dense_nnz": 2427996}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2616038, "linear_attention_total": 2359296, "linear_attention_nnz": 602496, "linear_dense_total": 4718592, "linear_dense_nnz": 2013542}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1775741, "linear_attention_total": 2359296, "linear_attention_nnz": 381632, "linear_dense_total": 4718592, "linear_dense_nnz": 1394109}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1407393, "linear_attention_total": 2359296, "linear_attention_nnz": 325760, "linear_dense_total": 4718592, "linear_dense_nnz": 1081633}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 765706, "linear_attention_total": 2359296, "linear_attention_nnz": 174016, "linear_dense_total": 4718592, "linear_dense_nnz": 591690}}, "total_sparsity": 46.405572153982156, "linear_sparsity": 59.47837829589844}, "speed": {"eval_elapsed_time": 33.08102096617222}, "opt_eval_metrics": {"exact_match": 81.00283822138127, "f1": 88.2671108560581}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 58344499, "linear_total": 84934656, "linear_nnz": 34400721, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3231632, "linear_attention_total": 2359296, "linear_attention_nnz": 404736, "linear_dense_total": 4718592, "linear_dense_nnz": 2826896}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3411716, "linear_attention_total": 2359296, "linear_attention_nnz": 543040, "linear_dense_total": 4718592, "linear_dense_nnz": 2868676}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3557965, "linear_attention_total": 2359296, "linear_attention_nnz": 614464, "linear_dense_total": 4718592, "linear_dense_nnz": 2943501}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3706774, "linear_attention_total": 2359296, "linear_attention_nnz": 790144, "linear_dense_total": 4718592, "linear_dense_nnz": 2916630}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3693580, "linear_attention_total": 2359296, "linear_attention_nnz": 816832, "linear_dense_total": 4718592, "linear_dense_nnz": 2876748}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3641505, "linear_attention_total": 2359296, "linear_attention_nnz": 785920, "linear_dense_total": 4718592, "linear_dense_nnz": 2855585}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3484162, "linear_attention_total": 2359296, "linear_attention_nnz": 759424, "linear_dense_total": 4718592, "linear_dense_nnz": 2724738}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114894, "linear_attention_total": 2359296, "linear_attention_nnz": 687040, "linear_dense_total": 4718592, "linear_dense_nnz": 2427854}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2617066, "linear_attention_total": 2359296, "linear_attention_nnz": 603648, "linear_dense_total": 4718592, "linear_dense_nnz": 2013418}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1773359, "linear_attention_total": 2359296, "linear_attention_nnz": 379328, "linear_dense_total": 4718592, "linear_dense_nnz": 1394031}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404175, "linear_attention_total": 2359296, "linear_attention_nnz": 322624, "linear_dense_total": 4718592, "linear_dense_nnz": 1081551}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 763893, "linear_attention_total": 2359296, "linear_attention_nnz": 172288, "linear_dense_total": 4718592, "linear_dense_nnz": 591605}}, "total_sparsity": 46.42043166961797, "linear_sparsity": 59.49742705733687}, "speed": {"eval_elapsed_time": 33.090760480146855}, "opt_eval_metrics": {"exact_match": 81.01229895931883, "f1": 88.16022239737082}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 63788226, "linear_total": 84934656, "linear_nnz": 39853312, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3867392, "linear_attention_total": 2359296, "linear_attention_nnz": 418816, "linear_dense_total": 4718592, "linear_dense_nnz": 3448576}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4250112, "linear_attention_total": 2359296, "linear_attention_nnz": 553728, "linear_dense_total": 4718592, "linear_dense_nnz": 3696384}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4519424, "linear_attention_total": 2359296, "linear_attention_nnz": 562432, "linear_dense_total": 4718592, "linear_dense_nnz": 3956992}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4809728, "linear_attention_total": 2359296, "linear_attention_nnz": 827392, "linear_dense_total": 4718592, "linear_dense_nnz": 3982336}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4702976, "linear_attention_total": 2359296, "linear_attention_nnz": 790016, "linear_dense_total": 4718592, "linear_dense_nnz": 3912960}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4601344, "linear_attention_total": 2359296, "linear_attention_nnz": 701696, "linear_dense_total": 4718592, "linear_dense_nnz": 3899648}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4243456, "linear_attention_total": 2359296, "linear_attention_nnz": 667392, "linear_dense_total": 4718592, "linear_dense_nnz": 3576064}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3669248, "linear_attention_total": 2359296, "linear_attention_nnz": 700416, "linear_dense_total": 4718592, "linear_dense_nnz": 2968832}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2404096, "linear_attention_total": 2359296, "linear_attention_nnz": 437504, "linear_dense_total": 4718592, "linear_dense_nnz": 1966592}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1054976, "linear_attention_total": 2359296, "linear_attention_nnz": 361472, "linear_dense_total": 4718592, "linear_dense_nnz": 693504}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 946176, "linear_attention_total": 2359296, "linear_attention_nnz": 291584, "linear_dense_total": 4718592, "linear_dense_nnz": 654592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784384, "linear_attention_total": 2359296, "linear_attention_nnz": 168960, "linear_dense_total": 4718592, "linear_dense_nnz": 615424}}, "total_sparsity": 41.42128782970864, "linear_sparsity": 53.077678915895056}, "speed": {"eval_elapsed_time": 26.95584986009635}, "opt_eval_metrics": {"exact_match": 80.02838221381268, "f1": 87.51569063636161}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 63672482, "linear_total": 84934656, "linear_nnz": 39737600, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3861248, "linear_attention_total": 2359296, "linear_attention_nnz": 416256, "linear_dense_total": 4718592, "linear_dense_nnz": 3444992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4250368, "linear_attention_total": 2359296, "linear_attention_nnz": 541952, "linear_dense_total": 4718592, "linear_dense_nnz": 3708416}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4532736, "linear_attention_total": 2359296, "linear_attention_nnz": 555520, "linear_dense_total": 4718592, "linear_dense_nnz": 3977216}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4822784, "linear_attention_total": 2359296, "linear_attention_nnz": 802816, "linear_dense_total": 4718592, "linear_dense_nnz": 4019968}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4714240, "linear_attention_total": 2359296, "linear_attention_nnz": 774400, "linear_dense_total": 4718592, "linear_dense_nnz": 3939840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4584192, "linear_attention_total": 2359296, "linear_attention_nnz": 686592, "linear_dense_total": 4718592, "linear_dense_nnz": 3897600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4225024, "linear_attention_total": 2359296, "linear_attention_nnz": 656384, "linear_dense_total": 4718592, "linear_dense_nnz": 3568640}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3634176, "linear_attention_total": 2359296, "linear_attention_nnz": 676864, "linear_dense_total": 4718592, "linear_dense_nnz": 2957312}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2363904, "linear_attention_total": 2359296, "linear_attention_nnz": 432640, "linear_dense_total": 4718592, "linear_dense_nnz": 1931264}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1030400, "linear_attention_total": 2359296, "linear_attention_nnz": 345344, "linear_dense_total": 4718592, "linear_dense_nnz": 685056}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 933120, "linear_attention_total": 2359296, "linear_attention_nnz": 285184, "linear_dense_total": 4718592, "linear_dense_nnz": 647936}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 785408, "linear_attention_total": 2359296, "linear_attention_nnz": 174080, "linear_dense_total": 4718592, "linear_dense_nnz": 611328}}, "total_sparsity": 41.52757914531035, "linear_sparsity": 53.213915412808646}, "speed": {"eval_elapsed_time": 26.93265108484775}, "opt_eval_metrics": {"exact_match": 79.55534531693472, "f1": 87.439750439335}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 63651698, "linear_total": 84934656, "linear_nnz": 39716864, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3864832, "linear_attention_total": 2359296, "linear_attention_nnz": 417024, "linear_dense_total": 4718592, "linear_dense_nnz": 3447808}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4246016, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 3703296}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4537600, "linear_attention_total": 2359296, "linear_attention_nnz": 555776, "linear_dense_total": 4718592, "linear_dense_nnz": 3981824}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4824576, "linear_attention_total": 2359296, "linear_attention_nnz": 810240, "linear_dense_total": 4718592, "linear_dense_nnz": 4014336}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4704768, "linear_attention_total": 2359296, "linear_attention_nnz": 764160, "linear_dense_total": 4718592, "linear_dense_nnz": 3940608}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4590080, "linear_attention_total": 2359296, "linear_attention_nnz": 685824, "linear_dense_total": 4718592, "linear_dense_nnz": 3904256}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4219136, "linear_attention_total": 2359296, "linear_attention_nnz": 647680, "linear_dense_total": 4718592, "linear_dense_nnz": 3571456}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3640576, "linear_attention_total": 2359296, "linear_attention_nnz": 684288, "linear_dense_total": 4718592, "linear_dense_nnz": 2956288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2360064, "linear_attention_total": 2359296, "linear_attention_nnz": 427264, "linear_dense_total": 4718592, "linear_dense_nnz": 1932800}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1033472, "linear_attention_total": 2359296, "linear_attention_nnz": 350976, "linear_dense_total": 4718592, "linear_dense_nnz": 682496}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 920832, "linear_attention_total": 2359296, "linear_attention_nnz": 273408, "linear_dense_total": 4718592, "linear_dense_nnz": 647424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 774912, "linear_attention_total": 2359296, "linear_attention_nnz": 166400, "linear_dense_total": 4718592, "linear_dense_nnz": 608512}}, "total_sparsity": 41.546665739029805, "linear_sparsity": 53.238329475308646}, "speed": {"eval_elapsed_time": 26.93877486907877}, "opt_eval_metrics": {"exact_match": 79.67833491012298, "f1": 87.29496050765553}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl2--2021-01-21--00-53-13/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 38271273, "linear_total": 84934656, "linear_nnz": 14360064, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 991744, "linear_attention_total": 2359296, "linear_attention_nnz": 864256, "linear_dense_total": 4718592, "linear_dense_nnz": 127488}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 965120, "linear_attention_total": 2359296, "linear_attention_nnz": 748544, "linear_dense_total": 4718592, "linear_dense_nnz": 216576}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1487360, "linear_attention_total": 2359296, "linear_attention_nnz": 1163264, "linear_dense_total": 4718592, "linear_dense_nnz": 324096}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1767424, "linear_attention_total": 2359296, "linear_attention_nnz": 1389568, "linear_dense_total": 4718592, "linear_dense_nnz": 377856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1864704, "linear_attention_total": 2359296, "linear_attention_nnz": 1449984, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1713664, "linear_attention_total": 2359296, "linear_attention_nnz": 1349632, "linear_dense_total": 4718592, "linear_dense_nnz": 364032}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1481216, "linear_attention_total": 2359296, "linear_attention_nnz": 1187840, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1190400, "linear_attention_total": 2359296, "linear_attention_nnz": 964608, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1191424, "linear_attention_total": 2359296, "linear_attention_nnz": 1063936, "linear_dense_total": 4718592, "linear_dense_nnz": 127488}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708608, "linear_attention_total": 2359296, "linear_attention_nnz": 650240, "linear_dense_total": 4718592, "linear_dense_nnz": 58368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 556544, "linear_attention_total": 2359296, "linear_attention_nnz": 502784, "linear_dense_total": 4718592, "linear_dense_nnz": 53760}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 441856, "linear_attention_total": 2359296, "linear_attention_nnz": 360448, "linear_dense_total": 4718592, "linear_dense_nnz": 81408}}, "total_sparsity": 64.85429951512302, "linear_sparsity": 83.0928096064815}, "speed": {"eval_elapsed_time": 17.621023153187707}, "opt_eval_metrics": {"exact_match": 78.67549668874172, "f1": 86.51098653495667}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 59478503, "linear_total": 84934656, "linear_nnz": 35536574, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3362923, "linear_attention_total": 2359296, "linear_attention_nnz": 466432, "linear_dense_total": 4718592, "linear_dense_nnz": 2896491}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3511822, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 2933262}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3642442, "linear_attention_total": 2359296, "linear_attention_nnz": 636672, "linear_dense_total": 4718592, "linear_dense_nnz": 3005770}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3843335, "linear_attention_total": 2359296, "linear_attention_nnz": 857344, "linear_dense_total": 4718592, "linear_dense_nnz": 2985991}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3768311, "linear_attention_total": 2359296, "linear_attention_nnz": 829184, "linear_dense_total": 4718592, "linear_dense_nnz": 2939127}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3670401, "linear_attention_total": 2359296, "linear_attention_nnz": 754432, "linear_dense_total": 4718592, "linear_dense_nnz": 2915969}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3555086, "linear_attention_total": 2359296, "linear_attention_nnz": 767488, "linear_dense_total": 4718592, "linear_dense_nnz": 2787598}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3250234, "linear_attention_total": 2359296, "linear_attention_nnz": 752640, "linear_dense_total": 4718592, "linear_dense_nnz": 2497594}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2669249, "linear_attention_total": 2359296, "linear_attention_nnz": 553472, "linear_dense_total": 4718592, "linear_dense_nnz": 2115777}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1903656, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 1490216}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522534, "linear_attention_total": 2359296, "linear_attention_nnz": 353792, "linear_dense_total": 4718592, "linear_dense_nnz": 1168742}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 836581, "linear_attention_total": 2359296, "linear_attention_nnz": 204032, "linear_dense_total": 4718592, "linear_dense_nnz": 632549}}, "total_sparsity": 45.379040521415185, "linear_sparsity": 58.160101337197375}, "speed": {"eval_elapsed_time": 30.383016001898795}, "opt_eval_metrics": {"exact_match": 80.93661305581836, "f1": 88.29241912882233}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 59470230, "linear_total": 84934656, "linear_nnz": 35528301, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3365714, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 2896466}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3508110, "linear_attention_total": 2359296, "linear_attention_nnz": 574976, "linear_dense_total": 4718592, "linear_dense_nnz": 2933134}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3640290, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 3005666}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3837370, "linear_attention_total": 2359296, "linear_attention_nnz": 851456, "linear_dense_total": 4718592, "linear_dense_nnz": 2985914}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3769702, "linear_attention_total": 2359296, "linear_attention_nnz": 830720, "linear_dense_total": 4718592, "linear_dense_nnz": 2938982}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3672353, "linear_attention_total": 2359296, "linear_attention_nnz": 756480, "linear_dense_total": 4718592, "linear_dense_nnz": 2915873}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3555719, "linear_attention_total": 2359296, "linear_attention_nnz": 768256, "linear_dense_total": 4718592, "linear_dense_nnz": 2787463}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3250893, "linear_attention_total": 2359296, "linear_attention_nnz": 753408, "linear_dense_total": 4718592, "linear_dense_nnz": 2497485}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666597, "linear_attention_total": 2359296, "linear_attention_nnz": 550912, "linear_dense_total": 4718592, "linear_dense_nnz": 2115685}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1903316, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 1490132}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1524282, "linear_attention_total": 2359296, "linear_attention_nnz": 355584, "linear_dense_total": 4718592, "linear_dense_nnz": 1168698}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 833955, "linear_attention_total": 2359296, "linear_attention_nnz": 201472, "linear_dense_total": 4718592, "linear_dense_nnz": 632483}}, "total_sparsity": 45.38663787466004, "linear_sparsity": 58.16984176635742}, "speed": {"eval_elapsed_time": 30.506126267835498}, "opt_eval_metrics": {"exact_match": 80.77578051087986, "f1": 88.22778160568927}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41069735, "linear_total": 84934656, "linear_nnz": 17134148, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825423, "linear_attention_total": 2359296, "linear_attention_nnz": 185152, "linear_dense_total": 4718592, "linear_dense_nnz": 1640271}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2011232, "linear_attention_total": 2359296, "linear_attention_nnz": 309376, "linear_dense_total": 4718592, "linear_dense_nnz": 1701856}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2013521, "linear_attention_total": 2359296, "linear_attention_nnz": 266368, "linear_dense_total": 4718592, "linear_dense_nnz": 1747153}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2151481, "linear_attention_total": 2359296, "linear_attention_nnz": 452288, "linear_dense_total": 4718592, "linear_dense_nnz": 1699193}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1937929, "linear_attention_total": 2359296, "linear_attention_nnz": 315584, "linear_dense_total": 4718592, "linear_dense_nnz": 1622345}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1906344, "linear_attention_total": 2359296, "linear_attention_nnz": 324160, "linear_dense_total": 4718592, "linear_dense_nnz": 1582184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660767, "linear_attention_total": 2359296, "linear_attention_nnz": 264448, "linear_dense_total": 4718592, "linear_dense_nnz": 1396319}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1418922, "linear_attention_total": 2359296, "linear_attention_nnz": 312704, "linear_dense_total": 4718592, "linear_dense_nnz": 1106218}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 973188, "linear_attention_total": 2359296, "linear_attention_nnz": 176128, "linear_dense_total": 4718592, "linear_dense_nnz": 797060}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574608, "linear_attention_total": 2359296, "linear_attention_nnz": 178368, "linear_dense_total": 4718592, "linear_dense_nnz": 396240}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 423537, "linear_attention_total": 2359296, "linear_attention_nnz": 140224, "linear_dense_total": 4718592, "linear_dense_nnz": 283313}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 237196, "linear_attention_total": 2359296, "linear_attention_nnz": 82304, "linear_dense_total": 4718592, "linear_dense_nnz": 154892}}, "total_sparsity": 62.28438480989986, "linear_sparsity": 79.82667051715615}, "speed": {"eval_elapsed_time": 28.065979121020064}, "opt_eval_metrics": {"exact_match": 78.59981078524125, "f1": 86.70965342219107}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40928357, "linear_total": 84934656, "linear_nnz": 16992855, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1811376, "linear_attention_total": 2359296, "linear_attention_nnz": 181120, "linear_dense_total": 4718592, "linear_dense_nnz": 1630256}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1999638, "linear_attention_total": 2359296, "linear_attention_nnz": 307392, "linear_dense_total": 4718592, "linear_dense_nnz": 1692246}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2004326, "linear_attention_total": 2359296, "linear_attention_nnz": 266880, "linear_dense_total": 4718592, "linear_dense_nnz": 1737446}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2132105, "linear_attention_total": 2359296, "linear_attention_nnz": 442304, "linear_dense_total": 4718592, "linear_dense_nnz": 1689801}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1922729, "linear_attention_total": 2359296, "linear_attention_nnz": 309632, "linear_dense_total": 4718592, "linear_dense_nnz": 1613097}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886520, "linear_attention_total": 2359296, "linear_attention_nnz": 313664, "linear_dense_total": 4718592, "linear_dense_nnz": 1572856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1646649, "linear_attention_total": 2359296, "linear_attention_nnz": 259072, "linear_dense_total": 4718592, "linear_dense_nnz": 1387577}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404939, "linear_attention_total": 2359296, "linear_attention_nnz": 306112, "linear_dense_total": 4718592, "linear_dense_nnz": 1098827}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 964630, "linear_attention_total": 2359296, "linear_attention_nnz": 173184, "linear_dense_total": 4718592, "linear_dense_nnz": 791446}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566118, "linear_attention_total": 2359296, "linear_attention_nnz": 172928, "linear_dense_total": 4718592, "linear_dense_nnz": 393190}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417586, "linear_attention_total": 2359296, "linear_attention_nnz": 136448, "linear_dense_total": 4718592, "linear_dense_nnz": 281138}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 236239, "linear_attention_total": 2359296, "linear_attention_nnz": 82304, "linear_dense_total": 4718592, "linear_dense_nnz": 153935}}, "total_sparsity": 62.414216625088, "linear_sparsity": 79.99302546183267}, "speed": {"eval_elapsed_time": 28.04132828908041}, "opt_eval_metrics": {"exact_match": 78.78902554399244, "f1": 86.80367154149816}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 40912185, "linear_total": 84934656, "linear_nnz": 16976675, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1810859, "linear_attention_total": 2359296, "linear_attention_nnz": 180736, "linear_dense_total": 4718592, "linear_dense_nnz": 1630123}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998023, "linear_attention_total": 2359296, "linear_attention_nnz": 305920, "linear_dense_total": 4718592, "linear_dense_nnz": 1692103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2001199, "linear_attention_total": 2359296, "linear_attention_nnz": 263936, "linear_dense_total": 4718592, "linear_dense_nnz": 1737263}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2132118, "linear_attention_total": 2359296, "linear_attention_nnz": 442496, "linear_dense_total": 4718592, "linear_dense_nnz": 1689622}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1919231, "linear_attention_total": 2359296, "linear_attention_nnz": 306304, "linear_dense_total": 4718592, "linear_dense_nnz": 1612927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1884897, "linear_attention_total": 2359296, "linear_attention_nnz": 312128, "linear_dense_total": 4718592, "linear_dense_nnz": 1572769}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1645758, "linear_attention_total": 2359296, "linear_attention_nnz": 258304, "linear_dense_total": 4718592, "linear_dense_nnz": 1387454}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404565, "linear_attention_total": 2359296, "linear_attention_nnz": 305856, "linear_dense_total": 4718592, "linear_dense_nnz": 1098709}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 963790, "linear_attention_total": 2359296, "linear_attention_nnz": 172480, "linear_dense_total": 4718592, "linear_dense_nnz": 791310}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 564071, "linear_attention_total": 2359296, "linear_attention_nnz": 170944, "linear_dense_total": 4718592, "linear_dense_nnz": 393127}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 416716, "linear_attention_total": 2359296, "linear_attention_nnz": 135616, "linear_dense_total": 4718592, "linear_dense_nnz": 281100}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 235448, "linear_attention_total": 2359296, "linear_attention_nnz": 81536, "linear_dense_total": 4718592, "linear_dense_nnz": 153912}}, "total_sparsity": 62.42906787574385, "linear_sparsity": 80.01207540064682}, "speed": {"eval_elapsed_time": 28.038834661012515}, "opt_eval_metrics": {"exact_match": 78.6092715231788, "f1": 86.70267601348202}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39496838, "linear_total": 84934656, "linear_nnz": 15559744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1647204, "linear_attention_total": 2359296, "linear_attention_nnz": 145232, "linear_dense_total": 4718592, "linear_dense_nnz": 1501972}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842586, "linear_attention_total": 2359296, "linear_attention_nnz": 280192, "linear_dense_total": 4718592, "linear_dense_nnz": 1562394}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841850, "linear_attention_total": 2359296, "linear_attention_nnz": 234064, "linear_dense_total": 4718592, "linear_dense_nnz": 1607786}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1960732, "linear_attention_total": 2359296, "linear_attention_nnz": 386752, "linear_dense_total": 4718592, "linear_dense_nnz": 1573980}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1776766, "linear_attention_total": 2359296, "linear_attention_nnz": 281632, "linear_dense_total": 4718592, "linear_dense_nnz": 1495134}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1744230, "linear_attention_total": 2359296, "linear_attention_nnz": 288320, "linear_dense_total": 4718592, "linear_dense_nnz": 1455910}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518906, "linear_attention_total": 2359296, "linear_attention_nnz": 240864, "linear_dense_total": 4718592, "linear_dense_nnz": 1278042}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1276331, "linear_attention_total": 2359296, "linear_attention_nnz": 275424, "linear_dense_total": 4718592, "linear_dense_nnz": 1000907}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 878990, "linear_attention_total": 2359296, "linear_attention_nnz": 170816, "linear_dense_total": 4718592, "linear_dense_nnz": 708174}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496916, "linear_attention_total": 2359296, "linear_attention_nnz": 165920, "linear_dense_total": 4718592, "linear_dense_nnz": 330996}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360001, "linear_attention_total": 2359296, "linear_attention_nnz": 126288, "linear_dense_total": 4718592, "linear_dense_nnz": 233713}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 215232, "linear_attention_total": 2359296, "linear_attention_nnz": 73824, "linear_dense_total": 4718592, "linear_dense_nnz": 141408}}, "total_sparsity": 63.728825052469304, "linear_sparsity": 81.68033552758487}, "speed": {"eval_elapsed_time": 29.592536952113733}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 87.22039562207584}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39357122, "linear_total": 84934656, "linear_nnz": 15420094, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1634237, "linear_attention_total": 2359296, "linear_attention_nnz": 142224, "linear_dense_total": 4718592, "linear_dense_nnz": 1492013}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828361, "linear_attention_total": 2359296, "linear_attention_nnz": 275696, "linear_dense_total": 4718592, "linear_dense_nnz": 1552665}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825967, "linear_attention_total": 2359296, "linear_attention_nnz": 227984, "linear_dense_total": 4718592, "linear_dense_nnz": 1597983}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1943566, "linear_attention_total": 2359296, "linear_attention_nnz": 379616, "linear_dense_total": 4718592, "linear_dense_nnz": 1563950}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1761455, "linear_attention_total": 2359296, "linear_attention_nnz": 275824, "linear_dense_total": 4718592, "linear_dense_nnz": 1485631}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1729290, "linear_attention_total": 2359296, "linear_attention_nnz": 282736, "linear_dense_total": 4718592, "linear_dense_nnz": 1446554}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1504955, "linear_attention_total": 2359296, "linear_attention_nnz": 235856, "linear_dense_total": 4718592, "linear_dense_nnz": 1269099}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1263225, "linear_attention_total": 2359296, "linear_attention_nnz": 269520, "linear_dense_total": 4718592, "linear_dense_nnz": 993705}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870496, "linear_attention_total": 2359296, "linear_attention_nnz": 167616, "linear_dense_total": 4718592, "linear_dense_nnz": 702880}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489695, "linear_attention_total": 2359296, "linear_attention_nnz": 161552, "linear_dense_total": 4718592, "linear_dense_nnz": 328143}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 355803, "linear_attention_total": 2359296, "linear_attention_nnz": 124096, "linear_dense_total": 4718592, "linear_dense_nnz": 231707}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 213044, "linear_attention_total": 2359296, "linear_attention_nnz": 72608, "linear_dense_total": 4718592, "linear_dense_nnz": 140436}}, "total_sparsity": 63.85713060135829, "linear_sparsity": 81.84475604398752}, "speed": {"eval_elapsed_time": 29.660654196050018}, "opt_eval_metrics": {"exact_match": 79.12961210974456, "f1": 87.04337592394437}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 39354055, "linear_total": 84934656, "linear_nnz": 15417031, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1634041, "linear_attention_total": 2359296, "linear_attention_nnz": 142224, "linear_dense_total": 4718592, "linear_dense_nnz": 1491817}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828346, "linear_attention_total": 2359296, "linear_attention_nnz": 275888, "linear_dense_total": 4718592, "linear_dense_nnz": 1552458}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825560, "linear_attention_total": 2359296, "linear_attention_nnz": 227744, "linear_dense_total": 4718592, "linear_dense_nnz": 1597816}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1942802, "linear_attention_total": 2359296, "linear_attention_nnz": 379008, "linear_dense_total": 4718592, "linear_dense_nnz": 1563794}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1761660, "linear_attention_total": 2359296, "linear_attention_nnz": 276192, "linear_dense_total": 4718592, "linear_dense_nnz": 1485468}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1728493, "linear_attention_total": 2359296, "linear_attention_nnz": 282096, "linear_dense_total": 4718592, "linear_dense_nnz": 1446397}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1504843, "linear_attention_total": 2359296, "linear_attention_nnz": 235856, "linear_dense_total": 4718592, "linear_dense_nnz": 1268987}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1262994, "linear_attention_total": 2359296, "linear_attention_nnz": 269456, "linear_dense_total": 4718592, "linear_dense_nnz": 993538}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870263, "linear_attention_total": 2359296, "linear_attention_nnz": 167520, "linear_dense_total": 4718592, "linear_dense_nnz": 702743}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489503, "linear_attention_total": 2359296, "linear_attention_nnz": 161424, "linear_dense_total": 4718592, "linear_dense_nnz": 328079}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 355610, "linear_attention_total": 2359296, "linear_attention_nnz": 123920, "linear_dense_total": 4718592, "linear_dense_nnz": 231690}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 212916, "linear_attention_total": 2359296, "linear_attention_nnz": 72512, "linear_dense_total": 4718592, "linear_dense_nnz": 140404}}, "total_sparsity": 63.859947122862216, "linear_sparsity": 81.84836234575437}, "speed": {"eval_elapsed_time": 29.750202575000003}, "opt_eval_metrics": {"exact_match": 79.09176915799432, "f1": 86.93076968810146}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36724619, "linear_total": 84934656, "linear_nnz": 12816896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 933888, "linear_attention_total": 2359296, "linear_attention_nnz": 522240, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1116160, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1374720, "linear_attention_total": 2359296, "linear_attention_nnz": 675840, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692160, "linear_attention_total": 2359296, "linear_attention_nnz": 977920, "linear_dense_total": 4718592, "linear_dense_nnz": 714240}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1659392, "linear_attention_total": 2359296, "linear_attention_nnz": 825344, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1416192, "linear_attention_total": 2359296, "linear_attention_nnz": 672768, "linear_dense_total": 4718592, "linear_dense_nnz": 743424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1207296, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1235456, "linear_attention_total": 2359296, "linear_attention_nnz": 785408, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 778240, "linear_attention_total": 2359296, "linear_attention_nnz": 514048, "linear_dense_total": 4718592, "linear_dense_nnz": 264192}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 526336, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 101376}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455168, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 421888, "linear_attention_total": 2359296, "linear_attention_nnz": 222208, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}}, "total_sparsity": 66.2746399944621, "linear_sparsity": 84.9096981095679}, "speed": {"eval_elapsed_time": 15.043476368067786}, "opt_eval_metrics": {"exact_match": 78.3349101229896, "f1": 86.4116267700138}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 36711275, "linear_total": 84934656, "linear_nnz": 12803584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 930816, "linear_attention_total": 2359296, "linear_attention_nnz": 519168, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1129472, "linear_attention_total": 2359296, "linear_attention_nnz": 536576, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1366528, "linear_attention_total": 2359296, "linear_attention_nnz": 667648, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1681920, "linear_attention_total": 2359296, "linear_attention_nnz": 967680, "linear_dense_total": 4718592, "linear_dense_nnz": 714240}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1669632, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1412096, "linear_attention_total": 2359296, "linear_attention_nnz": 668672, "linear_dense_total": 4718592, "linear_dense_nnz": 743424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1221632, "linear_attention_total": 2359296, "linear_attention_nnz": 653312, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1237504, "linear_attention_total": 2359296, "linear_attention_nnz": 787456, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 757760, "linear_attention_total": 2359296, "linear_attention_nnz": 493568, "linear_dense_total": 4718592, "linear_dense_nnz": 264192}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 526336, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 101376}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 443904, "linear_attention_total": 2359296, "linear_attention_nnz": 356352, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 425984, "linear_attention_total": 2359296, "linear_attention_nnz": 226304, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}}, "total_sparsity": 66.28689420474849, "linear_sparsity": 84.92537133487654}, "speed": {"eval_elapsed_time": 15.047897994983941}, "opt_eval_metrics": {"exact_match": 78.37275307473983, "f1": 86.39441106336629}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l10--2021-01-20--18-59-37/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 72403618, "linear_total": 84934656, "linear_nnz": 48458624, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4451008, "linear_attention_total": 2359296, "linear_attention_nnz": 446336, "linear_dense_total": 4718592, "linear_dense_nnz": 4004672}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4674880, "linear_attention_total": 2359296, "linear_attention_nnz": 597248, "linear_dense_total": 4718592, "linear_dense_nnz": 4077632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4954368, "linear_attention_total": 2359296, "linear_attention_nnz": 799296, "linear_dense_total": 4718592, "linear_dense_nnz": 4155072}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5115648, "linear_attention_total": 2359296, "linear_attention_nnz": 950208, "linear_dense_total": 4718592, "linear_dense_nnz": 4165440}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5174848, "linear_attention_total": 2359296, "linear_attention_nnz": 1022400, "linear_dense_total": 4718592, "linear_dense_nnz": 4152448}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5020992, "linear_attention_total": 2359296, "linear_attention_nnz": 914368, "linear_dense_total": 4718592, "linear_dense_nnz": 4106624}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4879296, "linear_attention_total": 2359296, "linear_attention_nnz": 918208, "linear_dense_total": 4718592, "linear_dense_nnz": 3961088}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4517696, "linear_attention_total": 2359296, "linear_attention_nnz": 832704, "linear_dense_total": 4718592, "linear_dense_nnz": 3684992}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3922688, "linear_attention_total": 2359296, "linear_attention_nnz": 715648, "linear_dense_total": 4718592, "linear_dense_nnz": 3207040}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2584512, "linear_attention_total": 2359296, "linear_attention_nnz": 467072, "linear_dense_total": 4718592, "linear_dense_nnz": 2117440}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879872, "linear_attention_total": 2359296, "linear_attention_nnz": 362688, "linear_dense_total": 4718592, "linear_dense_nnz": 1517184}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1282816, "linear_attention_total": 2359296, "linear_attention_nnz": 218432, "linear_dense_total": 4718592, "linear_dense_nnz": 1064384}}, "total_sparsity": 33.50950536060172, "linear_sparsity": 42.94599368248457}, "speed": {"eval_elapsed_time": 32.87611435819417}, "opt_eval_metrics": {"exact_match": 81.10690633869442, "f1": 88.3744311515211}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l10--2021-01-20--18-59-37/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 72395170, "linear_total": 84934656, "linear_nnz": 48450176, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4450944, "linear_attention_total": 2359296, "linear_attention_nnz": 446080, "linear_dense_total": 4718592, "linear_dense_nnz": 4004864}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4674240, "linear_attention_total": 2359296, "linear_attention_nnz": 597312, "linear_dense_total": 4718592, "linear_dense_nnz": 4076928}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4955648, "linear_attention_total": 2359296, "linear_attention_nnz": 800192, "linear_dense_total": 4718592, "linear_dense_nnz": 4155456}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5114624, "linear_attention_total": 2359296, "linear_attention_nnz": 948864, "linear_dense_total": 4718592, "linear_dense_nnz": 4165760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5171840, "linear_attention_total": 2359296, "linear_attention_nnz": 1019200, "linear_dense_total": 4718592, "linear_dense_nnz": 4152640}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5023808, "linear_attention_total": 2359296, "linear_attention_nnz": 915392, "linear_dense_total": 4718592, "linear_dense_nnz": 4108416}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4876544, "linear_attention_total": 2359296, "linear_attention_nnz": 916160, "linear_dense_total": 4718592, "linear_dense_nnz": 3960384}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4519232, "linear_attention_total": 2359296, "linear_attention_nnz": 834176, "linear_dense_total": 4718592, "linear_dense_nnz": 3685056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3921792, "linear_attention_total": 2359296, "linear_attention_nnz": 713856, "linear_dense_total": 4718592, "linear_dense_nnz": 3207936}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2581056, "linear_attention_total": 2359296, "linear_attention_nnz": 465600, "linear_dense_total": 4718592, "linear_dense_nnz": 2115456}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879424, "linear_attention_total": 2359296, "linear_attention_nnz": 362048, "linear_dense_total": 4718592, "linear_dense_nnz": 1517376}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1281024, "linear_attention_total": 2359296, "linear_attention_nnz": 217216, "linear_dense_total": 4718592, "linear_dense_nnz": 1063808}}, "total_sparsity": 33.51726342179023, "linear_sparsity": 42.95594015239198}, "speed": {"eval_elapsed_time": 32.87603668309748}, "opt_eval_metrics": {"exact_match": 80.82308420056765, "f1": 88.21300800880684}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 44702229, "linear_total": 84934656, "linear_nnz": 20786688, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1582592, "linear_attention_total": 2359296, "linear_attention_nnz": 1055744, "linear_dense_total": 4718592, "linear_dense_nnz": 526848}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1562624, "linear_attention_total": 2359296, "linear_attention_nnz": 809984, "linear_dense_total": 4718592, "linear_dense_nnz": 752640}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2190848, "linear_attention_total": 2359296, "linear_attention_nnz": 1316864, "linear_dense_total": 4718592, "linear_dense_nnz": 873984}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2420736, "linear_attention_total": 2359296, "linear_attention_nnz": 1468416, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2697728, "linear_attention_total": 2359296, "linear_attention_nnz": 1651712, "linear_dense_total": 4718592, "linear_dense_nnz": 1046016}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2603008, "linear_attention_total": 2359296, "linear_attention_nnz": 1616896, "linear_dense_total": 4718592, "linear_dense_nnz": 986112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102272, "linear_attention_total": 2359296, "linear_attention_nnz": 1361920, "linear_dense_total": 4718592, "linear_dense_nnz": 740352}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1824768, "linear_attention_total": 2359296, "linear_attention_nnz": 1265664, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1505792, "linear_attention_total": 2359296, "linear_attention_nnz": 1212416, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 863232, "linear_attention_total": 2359296, "linear_attention_nnz": 749568, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 652288, "linear_dense_total": 4718592, "linear_dense_nnz": 98304}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 682496, "linear_attention_total": 2359296, "linear_attention_nnz": 419840, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}}, "total_sparsity": 58.94855257518133, "linear_sparsity": 75.52625868055556}, "speed": {"eval_elapsed_time": 19.962007428053766}, "opt_eval_metrics": {"exact_match": 80.58656575212866, "f1": 88.06903108265608}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l10--2021-01-20--18-58-11/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 72878482, "linear_total": 84934656, "linear_nnz": 48937216, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4586496, "linear_attention_total": 2359296, "linear_attention_nnz": 517888, "linear_dense_total": 4718592, "linear_dense_nnz": 4068608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4844288, "linear_attention_total": 2359296, "linear_attention_nnz": 641536, "linear_dense_total": 4718592, "linear_dense_nnz": 4202752}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5155328, "linear_attention_total": 2359296, "linear_attention_nnz": 841472, "linear_dense_total": 4718592, "linear_dense_nnz": 4313856}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5409024, "linear_attention_total": 2359296, "linear_attention_nnz": 1072896, "linear_dense_total": 4718592, "linear_dense_nnz": 4336128}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5385984, "linear_attention_total": 2359296, "linear_attention_nnz": 1068800, "linear_dense_total": 4718592, "linear_dense_nnz": 4317184}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5272832, "linear_attention_total": 2359296, "linear_attention_nnz": 961792, "linear_dense_total": 4718592, "linear_dense_nnz": 4311040}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5128448, "linear_attention_total": 2359296, "linear_attention_nnz": 986880, "linear_dense_total": 4718592, "linear_dense_nnz": 4141568}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4725504, "linear_attention_total": 2359296, "linear_attention_nnz": 905472, "linear_dense_total": 4718592, "linear_dense_nnz": 3820032}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3841792, "linear_attention_total": 2359296, "linear_attention_nnz": 756224, "linear_dense_total": 4718592, "linear_dense_nnz": 3085568}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879808, "linear_attention_total": 2359296, "linear_attention_nnz": 463360, "linear_dense_total": 4718592, "linear_dense_nnz": 1416448}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1505792, "linear_attention_total": 2359296, "linear_attention_nnz": 415488, "linear_dense_total": 4718592, "linear_dense_nnz": 1090304}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1201920, "linear_attention_total": 2359296, "linear_attention_nnz": 254720, "linear_dense_total": 4718592, "linear_dense_nnz": 947200}}, "total_sparsity": 33.07342297799975, "linear_sparsity": 42.38251109182099}, "speed": {"eval_elapsed_time": 30.725059562828392}, "opt_eval_metrics": {"exact_match": 80.93661305581836, "f1": 88.34112193061533}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 34772839, "linear_total": 84934656, "linear_nnz": 10866176, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 732160, "linear_attention_total": 2359296, "linear_attention_nnz": 550912, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835072, "linear_attention_total": 2359296, "linear_attention_nnz": 535552, "linear_dense_total": 4718592, "linear_dense_nnz": 299520}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1128960, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 407040}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1551872, "linear_attention_total": 2359296, "linear_attention_nnz": 1111040, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1389056, "linear_attention_total": 2359296, "linear_attention_nnz": 892928, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1096704, "linear_attention_total": 2359296, "linear_attention_nnz": 663552, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1000448, "linear_attention_total": 2359296, "linear_attention_nnz": 662528, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 801792, "linear_dense_total": 4718592, "linear_dense_nnz": 268800}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 803328, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 498688, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 73728}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 422912, "linear_attention_total": 2359296, "linear_attention_nnz": 364544, "linear_dense_total": 4718592, "linear_dense_nnz": 58368}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 336384, "linear_attention_total": 2359296, "linear_attention_nnz": 239616, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 68.06702028169144, "linear_sparsity": 87.20642843364197}, "speed": {"eval_elapsed_time": 14.562878740951419}, "opt_eval_metrics": {"exact_match": 77.8240302743614, "f1": 86.11992485005756}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l40--2021-01-20--18-59-08/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53223538, "linear_total": 84934656, "linear_nnz": 29295872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2685696, "linear_attention_total": 2359296, "linear_attention_nnz": 331008, "linear_dense_total": 4718592, "linear_dense_nnz": 2354688}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3258624, "linear_attention_total": 2359296, "linear_attention_nnz": 432384, "linear_dense_total": 4718592, "linear_dense_nnz": 2826240}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3726080, "linear_attention_total": 2359296, "linear_attention_nnz": 423936, "linear_dense_total": 4718592, "linear_dense_nnz": 3302144}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3917568, "linear_attention_total": 2359296, "linear_attention_nnz": 669440, "linear_dense_total": 4718592, "linear_dense_nnz": 3248128}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3647232, "linear_attention_total": 2359296, "linear_attention_nnz": 453632, "linear_dense_total": 4718592, "linear_dense_nnz": 3193600}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3593472, "linear_attention_total": 2359296, "linear_attention_nnz": 473856, "linear_dense_total": 4718592, "linear_dense_nnz": 3119616}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2939648, "linear_attention_total": 2359296, "linear_attention_nnz": 445952, "linear_dense_total": 4718592, "linear_dense_nnz": 2493696}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2381824, "linear_attention_total": 2359296, "linear_attention_nnz": 490752, "linear_dense_total": 4718592, "linear_dense_nnz": 1891072}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1384448, "linear_attention_total": 2359296, "linear_attention_nnz": 275712, "linear_dense_total": 4718592, "linear_dense_nnz": 1108736}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 258304, "linear_dense_total": 4718592, "linear_dense_nnz": 348928}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 618752, "linear_attention_total": 2359296, "linear_attention_nnz": 203008, "linear_dense_total": 4718592, "linear_dense_nnz": 415744}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 535296, "linear_attention_total": 2359296, "linear_attention_nnz": 112128, "linear_dense_total": 4718592, "linear_dense_nnz": 423168}}, "total_sparsity": 51.12316945157615, "linear_sparsity": 65.5077522183642}, "speed": {"eval_elapsed_time": 23.845138414064422}, "opt_eval_metrics": {"exact_match": 78.11731315042573, "f1": 86.14927876930865}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l10--2021-01-19--17-00-07/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 56967217, "linear_total": 84934656, "linear_nnz": 33019881, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3067235, "linear_attention_total": 2359296, "linear_attention_nnz": 356016, "linear_dense_total": 4718592, "linear_dense_nnz": 2711219}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3260347, "linear_attention_total": 2359296, "linear_attention_nnz": 506400, "linear_dense_total": 4718592, "linear_dense_nnz": 2753947}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3486676, "linear_attention_total": 2359296, "linear_attention_nnz": 658880, "linear_dense_total": 4718592, "linear_dense_nnz": 2827796}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3592390, "linear_attention_total": 2359296, "linear_attention_nnz": 782176, "linear_dense_total": 4718592, "linear_dense_nnz": 2810214}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3644732, "linear_attention_total": 2359296, "linear_attention_nnz": 874272, "linear_dense_total": 4718592, "linear_dense_nnz": 2770460}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3523230, "linear_attention_total": 2359296, "linear_attention_nnz": 772928, "linear_dense_total": 4718592, "linear_dense_nnz": 2750302}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3378315, "linear_attention_total": 2359296, "linear_attention_nnz": 767984, "linear_dense_total": 4718592, "linear_dense_nnz": 2610331}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2983346, "linear_attention_total": 2359296, "linear_attention_nnz": 687968, "linear_dense_total": 4718592, "linear_dense_nnz": 2295378}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465095, "linear_attention_total": 2359296, "linear_attention_nnz": 596368, "linear_dense_total": 4718592, "linear_dense_nnz": 1868727}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1650223, "linear_attention_total": 2359296, "linear_attention_nnz": 404448, "linear_dense_total": 4718592, "linear_dense_nnz": 1245775}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1262562, "linear_attention_total": 2359296, "linear_attention_nnz": 305952, "linear_dense_total": 4718592, "linear_dense_nnz": 956610}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705730, "linear_attention_total": 2359296, "linear_attention_nnz": 172864, "linear_dense_total": 4718592, "linear_dense_nnz": 532866}}, "total_sparsity": 47.6852325727709, "linear_sparsity": 61.12319451791268}, "speed": {"eval_elapsed_time": 35.13715321500786}, "opt_eval_metrics": {"exact_match": 81.3434247871334, "f1": 88.502960365548}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl2--2021-01-21--00-54-43/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 34069864, "linear_total": 84934656, "linear_nnz": 10163200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 674816, "linear_attention_total": 2359296, "linear_attention_nnz": 598016, "linear_dense_total": 4718592, "linear_dense_nnz": 76800}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 621568, "linear_dense_total": 4718592, "linear_dense_nnz": 129024}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1137664, "linear_attention_total": 2359296, "linear_attention_nnz": 937984, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1458176, "linear_attention_total": 2359296, "linear_attention_nnz": 1193984, "linear_dense_total": 4718592, "linear_dense_nnz": 264192}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1335808, "linear_attention_total": 2359296, "linear_attention_nnz": 1057792, "linear_dense_total": 4718592, "linear_dense_nnz": 278016}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 843264, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 948736, "linear_attention_total": 2359296, "linear_attention_nnz": 759808, "linear_dense_total": 4718592, "linear_dense_nnz": 188928}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 979456, "linear_attention_total": 2359296, "linear_attention_nnz": 830464, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 833536, "linear_attention_total": 2359296, "linear_attention_nnz": 753664, "linear_dense_total": 4718592, "linear_dense_nnz": 79872}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 478208, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 46080}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 432128, "linear_attention_total": 2359296, "linear_attention_nnz": 395264, "linear_dense_total": 4718592, "linear_dense_nnz": 36864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 290816, "linear_attention_total": 2359296, "linear_attention_nnz": 238592, "linear_dense_total": 4718592, "linear_dense_nnz": 52224}}, "total_sparsity": 68.71258409134985, "linear_sparsity": 88.03409529320987}, "speed": {"eval_elapsed_time": 15.140548604074866}, "opt_eval_metrics": {"exact_match": 76.9914853358562, "f1": 85.26341062121247}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l20--2021-01-20--19-00-06/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 62877338, "linear_total": 84934656, "linear_nnz": 38938240, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3827456, "linear_attention_total": 2359296, "linear_attention_nnz": 326336, "linear_dense_total": 4718592, "linear_dense_nnz": 3501120}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4141120, "linear_attention_total": 2359296, "linear_attention_nnz": 487552, "linear_dense_total": 4718592, "linear_dense_nnz": 3653568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4289088, "linear_attention_total": 2359296, "linear_attention_nnz": 487616, "linear_dense_total": 4718592, "linear_dense_nnz": 3801472}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4512896, "linear_attention_total": 2359296, "linear_attention_nnz": 712832, "linear_dense_total": 4718592, "linear_dense_nnz": 3800064}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4390144, "linear_attention_total": 2359296, "linear_attention_nnz": 646272, "linear_dense_total": 4718592, "linear_dense_nnz": 3743872}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4316928, "linear_attention_total": 2359296, "linear_attention_nnz": 625600, "linear_dense_total": 4718592, "linear_dense_nnz": 3691328}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4036864, "linear_attention_total": 2359296, "linear_attention_nnz": 575808, "linear_dense_total": 4718592, "linear_dense_nnz": 3461056}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3592320, "linear_attention_total": 2359296, "linear_attention_nnz": 579392, "linear_dense_total": 4718592, "linear_dense_nnz": 3012928}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2753408, "linear_attention_total": 2359296, "linear_attention_nnz": 405632, "linear_dense_total": 4718592, "linear_dense_nnz": 2347776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1318784, "linear_attention_total": 2359296, "linear_attention_nnz": 317440, "linear_dense_total": 4718592, "linear_dense_nnz": 1001344}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 994816, "linear_attention_total": 2359296, "linear_attention_nnz": 238208, "linear_dense_total": 4718592, "linear_dense_nnz": 756608}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 764416, "linear_attention_total": 2359296, "linear_attention_nnz": 141568, "linear_dense_total": 4718592, "linear_dense_nnz": 622848}}, "total_sparsity": 42.257784614732465, "linear_sparsity": 54.1550624517747}, "speed": {"eval_elapsed_time": 29.41211991594173}, "opt_eval_metrics": {"exact_match": 80.05676442762535, "f1": 87.66615713942541}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 42128141, "linear_total": 84934656, "linear_nnz": 18215424, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277440, "linear_attention_total": 2359296, "linear_attention_nnz": 643072, "linear_dense_total": 4718592, "linear_dense_nnz": 634368}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1539584, "linear_attention_total": 2359296, "linear_attention_nnz": 622592, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2068480, "linear_attention_total": 2359296, "linear_attention_nnz": 1051648, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2334208, "linear_attention_total": 2359296, "linear_attention_nnz": 1257472, "linear_dense_total": 4718592, "linear_dense_nnz": 1076736}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2473984, "linear_attention_total": 2359296, "linear_attention_nnz": 1315840, "linear_dense_total": 4718592, "linear_dense_nnz": 1158144}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2078208, "linear_attention_total": 2359296, "linear_attention_nnz": 1004544, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1820160, "linear_attention_total": 2359296, "linear_attention_nnz": 1004544, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1555456, "linear_attention_total": 2359296, "linear_attention_nnz": 925696, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236992, "linear_attention_total": 2359296, "linear_attention_nnz": 899072, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 663040, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 576000, "linear_attention_total": 2359296, "linear_attention_nnz": 463872, "linear_dense_total": 4718592, "linear_dense_nnz": 112128}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591872, "linear_attention_total": 2359296, "linear_attention_nnz": 278528, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}}, "total_sparsity": 61.31241765669342, "linear_sparsity": 78.55360243055556}, "speed": {"eval_elapsed_time": 17.672173040919006}, "opt_eval_metrics": {"exact_match": 79.66887417218543, "f1": 87.3881230572442}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 39533983, "linear_total": 84934656, "linear_nnz": 15622656, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1107968, "linear_attention_total": 2359296, "linear_attention_nnz": 809984, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1204736, "linear_attention_total": 2359296, "linear_attention_nnz": 720896, "linear_dense_total": 4718592, "linear_dense_nnz": 483840}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1717760, "linear_attention_total": 2359296, "linear_attention_nnz": 1098752, "linear_dense_total": 4718592, "linear_dense_nnz": 619008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1967104, "linear_attention_total": 2359296, "linear_attention_nnz": 1309696, "linear_dense_total": 4718592, "linear_dense_nnz": 657408}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2067968, "linear_attention_total": 2359296, "linear_attention_nnz": 1362944, "linear_dense_total": 4718592, "linear_dense_nnz": 705024}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742336, "linear_attention_total": 2359296, "linear_attention_nnz": 1074176, "linear_dense_total": 4718592, "linear_dense_nnz": 668160}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1565696, "linear_attention_total": 2359296, "linear_attention_nnz": 1049600, "linear_dense_total": 4718592, "linear_dense_nnz": 516096}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1342464, "linear_attention_total": 2359296, "linear_attention_nnz": 958464, "linear_dense_total": 4718592, "linear_dense_nnz": 384000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1153536, "linear_attention_total": 2359296, "linear_attention_nnz": 949248, "linear_dense_total": 4718592, "linear_dense_nnz": 204288}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 729088, "linear_attention_total": 2359296, "linear_attention_nnz": 636928, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 551936, "linear_attention_total": 2359296, "linear_attention_nnz": 478208, "linear_dense_total": 4718592, "linear_dense_nnz": 73728}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 472064, "linear_attention_total": 2359296, "linear_attention_nnz": 312320, "linear_dense_total": 4718592, "linear_dense_nnz": 159744}}, "total_sparsity": 63.694713643514845, "linear_sparsity": 81.6062644675926}, "speed": {"eval_elapsed_time": 17.396596929989755}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 87.14755939306319}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl2--2021-01-21--00-51-49/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 42626625, "linear_total": 84934656, "linear_nnz": 18712064, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1420800, "linear_attention_total": 2359296, "linear_attention_nnz": 1210368, "linear_dense_total": 4718592, "linear_dense_nnz": 210432}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1381888, "linear_attention_total": 2359296, "linear_attention_nnz": 977920, "linear_dense_total": 4718592, "linear_dense_nnz": 403968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2013184, "linear_attention_total": 2359296, "linear_attention_nnz": 1500160, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2115072, "linear_attention_total": 2359296, "linear_attention_nnz": 1526784, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2395136, "linear_attention_total": 2359296, "linear_attention_nnz": 1734656, "linear_dense_total": 4718592, "linear_dense_nnz": 660480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2211328, "linear_attention_total": 2359296, "linear_attention_nnz": 1659904, "linear_dense_total": 4718592, "linear_dense_nnz": 551424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1943040, "linear_attention_total": 2359296, "linear_attention_nnz": 1486848, "linear_dense_total": 4718592, "linear_dense_nnz": 456192}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1590784, "linear_attention_total": 2359296, "linear_attention_nnz": 1254400, "linear_dense_total": 4718592, "linear_dense_nnz": 336384}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1441280, "linear_attention_total": 2359296, "linear_attention_nnz": 1267712, "linear_dense_total": 4718592, "linear_dense_nnz": 173568}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 837632, "linear_attention_total": 2359296, "linear_attention_nnz": 760832, "linear_dense_total": 4718592, "linear_dense_nnz": 76800}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 781824, "linear_attention_total": 2359296, "linear_attention_nnz": 712704, "linear_dense_total": 4718592, "linear_dense_nnz": 69120}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 580096, "linear_attention_total": 2359296, "linear_attention_nnz": 443392, "linear_dense_total": 4718592, "linear_dense_nnz": 136704}}, "total_sparsity": 60.85464429335368, "linear_sparsity": 77.96887056327161}, "speed": {"eval_elapsed_time": 19.82656983099878}, "opt_eval_metrics": {"exact_match": 80.10406811731315, "f1": 87.56487698206614}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l40--2021-01-19--16-58-18/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 42014844, "linear_total": 84934656, "linear_nnz": 18080164, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1930333, "linear_attention_total": 2359296, "linear_attention_nnz": 211712, "linear_dense_total": 4718592, "linear_dense_nnz": 1718621}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2116878, "linear_attention_total": 2359296, "linear_attention_nnz": 345600, "linear_dense_total": 4718592, "linear_dense_nnz": 1771278}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2094823, "linear_attention_total": 2359296, "linear_attention_nnz": 278016, "linear_dense_total": 4718592, "linear_dense_nnz": 1816807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2266081, "linear_attention_total": 2359296, "linear_attention_nnz": 493312, "linear_dense_total": 4718592, "linear_dense_nnz": 1772769}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1986893, "linear_attention_total": 2359296, "linear_attention_nnz": 304128, "linear_dense_total": 4718592, "linear_dense_nnz": 1682765}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1992507, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 1635131}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1736239, "linear_attention_total": 2359296, "linear_attention_nnz": 278528, "linear_dense_total": 4718592, "linear_dense_nnz": 1457711}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529879, "linear_attention_total": 2359296, "linear_attention_nnz": 355072, "linear_dense_total": 4718592, "linear_dense_nnz": 1174807}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1051417, "linear_attention_total": 2359296, "linear_attention_nnz": 183552, "linear_dense_total": 4718592, "linear_dense_nnz": 867865}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 636321, "linear_attention_total": 2359296, "linear_attention_nnz": 196864, "linear_dense_total": 4718592, "linear_dense_nnz": 439457}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 483651, "linear_attention_total": 2359296, "linear_attention_nnz": 157696, "linear_dense_total": 4718592, "linear_dense_nnz": 325955}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 255142, "linear_attention_total": 2359296, "linear_attention_nnz": 90368, "linear_dense_total": 4718592, "linear_dense_nnz": 164774}}, "total_sparsity": 61.41646181607727, "linear_sparsity": 78.7128542676384}, "speed": {"eval_elapsed_time": 27.474724027095363}, "opt_eval_metrics": {"exact_match": 78.86471144749291, "f1": 86.87223379259328}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_0/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39362145, "linear_total": 84934656, "linear_nnz": 15449344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1074688, "linear_attention_total": 2359296, "linear_attention_nnz": 796672, "linear_dense_total": 4718592, "linear_dense_nnz": 278016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236224, "linear_attention_total": 2359296, "linear_attention_nnz": 769280, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1635328, "linear_attention_total": 2359296, "linear_attention_nnz": 1056256, "linear_dense_total": 4718592, "linear_dense_nnz": 579072}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 1259264, "linear_dense_total": 4718592, "linear_dense_nnz": 640512}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2041856, "linear_attention_total": 2359296, "linear_attention_nnz": 1344512, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860096, "linear_attention_total": 2359296, "linear_attention_nnz": 1244160, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571072, "linear_attention_total": 2359296, "linear_attention_nnz": 1088768, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1299200, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 382464}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1150464, "linear_attention_total": 2359296, "linear_attention_nnz": 955392, "linear_dense_total": 4718592, "linear_dense_nnz": 195072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668928, "linear_attention_total": 2359296, "linear_attention_nnz": 590592, "linear_dense_total": 4718592, "linear_dense_nnz": 78336}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 548352, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 72192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 463360, "linear_attention_total": 2359296, "linear_attention_nnz": 308224, "linear_dense_total": 4718592, "linear_dense_nnz": 155136}}, "total_sparsity": 63.85251782420986, "linear_sparsity": 81.81031780478395}, "speed": {"eval_elapsed_time": 18.814206156879663}, "opt_eval_metrics": {"exact_match": 79.33774834437087, "f1": 87.07382313022637}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l20--2021-01-19--17-00-34/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 47377613, "linear_total": 84934656, "linear_nnz": 23436196, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2342065, "linear_attention_total": 2359296, "linear_attention_nnz": 233808, "linear_dense_total": 4718592, "linear_dense_nnz": 2108257}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2536721, "linear_attention_total": 2359296, "linear_attention_nnz": 370912, "linear_dense_total": 4718592, "linear_dense_nnz": 2165809}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2607352, "linear_attention_total": 2359296, "linear_attention_nnz": 368864, "linear_dense_total": 4718592, "linear_dense_nnz": 2238488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2740822, "linear_attention_total": 2359296, "linear_attention_nnz": 528528, "linear_dense_total": 4718592, "linear_dense_nnz": 2212294}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2662766, "linear_attention_total": 2359296, "linear_attention_nnz": 515168, "linear_dense_total": 4718592, "linear_dense_nnz": 2147598}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2582248, "linear_attention_total": 2359296, "linear_attention_nnz": 456576, "linear_dense_total": 4718592, "linear_dense_nnz": 2125672}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2384302, "linear_attention_total": 2359296, "linear_attention_nnz": 426512, "linear_dense_total": 4718592, "linear_dense_nnz": 1957790}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045939, "linear_attention_total": 2359296, "linear_attention_nnz": 424416, "linear_dense_total": 4718592, "linear_dense_nnz": 1621523}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1531552, "linear_attention_total": 2359296, "linear_attention_nnz": 311248, "linear_dense_total": 4718592, "linear_dense_nnz": 1220304}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 936640, "linear_attention_total": 2359296, "linear_attention_nnz": 249120, "linear_dense_total": 4718592, "linear_dense_nnz": 687520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 686193, "linear_attention_total": 2359296, "linear_attention_nnz": 189856, "linear_dense_total": 4718592, "linear_dense_nnz": 496337}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379596, "linear_attention_total": 2359296, "linear_attention_nnz": 106192, "linear_dense_total": 4718592, "linear_dense_nnz": 273404}}, "total_sparsity": 56.49166422589565, "linear_sparsity": 72.40679234634212}, "speed": {"eval_elapsed_time": 32.3695623409003}, "opt_eval_metrics": {"exact_match": 80.54872280037843, "f1": 88.09731480353894}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42067458, "linear_total": 84934656, "linear_nnz": 18108928, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437184, "linear_attention_total": 2359296, "linear_attention_nnz": 472576, "linear_dense_total": 4718592, "linear_dense_nnz": 964608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1754624, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015488, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2276608, "linear_attention_total": 2359296, "linear_attention_nnz": 951040, "linear_dense_total": 4718592, "linear_dense_nnz": 1325568}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2280448, "linear_attention_total": 2359296, "linear_attention_nnz": 861184, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2123008, "linear_attention_total": 2359296, "linear_attention_nnz": 779008, "linear_dense_total": 4718592, "linear_dense_nnz": 1344000}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841152, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1041408}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553664, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042432, "linear_attention_total": 2359296, "linear_attention_nnz": 610816, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 584960, "linear_attention_total": 2359296, "linear_attention_nnz": 405248, "linear_dense_total": 4718592, "linear_dense_nnz": 179712}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 540928, "linear_attention_total": 2359296, "linear_attention_nnz": 395008, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 658432, "linear_attention_total": 2359296, "linear_attention_nnz": 217600, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}}, "total_sparsity": 61.3681447432349, "linear_sparsity": 78.6789882330247}, "speed": {"eval_elapsed_time": 17.91969774197787}, "opt_eval_metrics": {"exact_match": 79.38505203405866, "f1": 87.07610213911921}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41735426, "linear_total": 84934656, "linear_nnz": 17776896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1405440, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 929280}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1732352, "linear_attention_total": 2359296, "linear_attention_nnz": 589568, "linear_dense_total": 4718592, "linear_dense_nnz": 1142784}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1979136, "linear_attention_total": 2359296, "linear_attention_nnz": 628992, "linear_dense_total": 4718592, "linear_dense_nnz": 1350144}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2218752, "linear_attention_total": 2359296, "linear_attention_nnz": 913152, "linear_dense_total": 4718592, "linear_dense_nnz": 1305600}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2257664, "linear_attention_total": 2359296, "linear_attention_nnz": 850688, "linear_dense_total": 4718592, "linear_dense_nnz": 1406976}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096384, "linear_attention_total": 2359296, "linear_attention_nnz": 764672, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1786112, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 1022976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1538816, "linear_attention_total": 2359296, "linear_attention_nnz": 781568, "linear_dense_total": 4718592, "linear_dense_nnz": 757248}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1027840, "linear_attention_total": 2359296, "linear_attention_nnz": 596224, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 176640}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523008, "linear_attention_total": 2359296, "linear_attention_nnz": 378624, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640000, "linear_attention_total": 2359296, "linear_attention_nnz": 208384, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.67306005721974, "linear_sparsity": 79.0699146412037}, "speed": {"eval_elapsed_time": 17.6927186998073}, "opt_eval_metrics": {"exact_match": 78.72280037842951, "f1": 86.62745564109652}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 43462146, "linear_total": 84934656, "linear_nnz": 19503616, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660672, "linear_attention_total": 2359296, "linear_attention_nnz": 579328, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 632576, "linear_dense_total": 4718592, "linear_dense_nnz": 1267200}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2031104, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1446912}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2544128, "linear_attention_total": 2359296, "linear_attention_nnz": 1049600, "linear_dense_total": 4718592, "linear_dense_nnz": 1494528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2395904, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 1479168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2184960, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 1394688}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1912320, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1113600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 969216, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 471808, "linear_dense_total": 4718592, "linear_dense_nnz": 497664}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 717312, "linear_attention_total": 2359296, "linear_attention_nnz": 505344, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631040, "linear_attention_total": 2359296, "linear_attention_nnz": 448256, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 288256, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}}, "total_sparsity": 60.08735936884057, "linear_sparsity": 77.03691647376543}, "speed": {"eval_elapsed_time": 16.150802633957937}, "opt_eval_metrics": {"exact_match": 78.93093661305582, "f1": 86.85787750084084}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42678018, "linear_total": 84934656, "linear_nnz": 18719488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1560320, "linear_attention_total": 2359296, "linear_attention_nnz": 543488, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1816320, "linear_attention_total": 2359296, "linear_attention_nnz": 593664, "linear_dense_total": 4718592, "linear_dense_nnz": 1222656}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2024704, "linear_attention_total": 2359296, "linear_attention_nnz": 603904, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2329856, "linear_attention_total": 2359296, "linear_attention_nnz": 870656, "linear_dense_total": 4718592, "linear_dense_nnz": 1459200}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2332928, "linear_attention_total": 2359296, "linear_attention_nnz": 887552, "linear_dense_total": 4718592, "linear_dense_nnz": 1445376}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 720640, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742080, "linear_attention_total": 2359296, "linear_attention_nnz": 926464, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 944384, "linear_attention_total": 2359296, "linear_attention_nnz": 455936, "linear_dense_total": 4718592, "linear_dense_nnz": 488448}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705280, "linear_attention_total": 2359296, "linear_attention_nnz": 505600, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 587264, "linear_attention_total": 2359296, "linear_attention_nnz": 409088, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 697856, "linear_attention_total": 2359296, "linear_attention_nnz": 250880, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}}, "total_sparsity": 60.80744850279245, "linear_sparsity": 77.96012972608024}, "speed": {"eval_elapsed_time": 15.863338297931477}, "opt_eval_metrics": {"exact_match": 78.78902554399244, "f1": 86.64151988736798}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 46192898, "linear_total": 84934656, "linear_nnz": 22234368, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 551680, "linear_dense_total": 4718592, "linear_dense_nnz": 1539072}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2278656, "linear_attention_total": 2359296, "linear_attention_nnz": 596736, "linear_dense_total": 4718592, "linear_dense_nnz": 1681920}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2418688, "linear_attention_total": 2359296, "linear_attention_nnz": 567808, "linear_dense_total": 4718592, "linear_dense_nnz": 1850880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2844416, "linear_attention_total": 2359296, "linear_attention_nnz": 1002752, "linear_dense_total": 4718592, "linear_dense_nnz": 1841664}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2691072, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 1812480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2475264, "linear_attention_total": 2359296, "linear_attention_nnz": 721152, "linear_dense_total": 4718592, "linear_dense_nnz": 1754112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2229248, "linear_attention_total": 2359296, "linear_attention_nnz": 805376, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1966336, "linear_attention_total": 2359296, "linear_attention_nnz": 892672, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701440, "linear_attention_total": 2359296, "linear_attention_nnz": 454144, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598272, "linear_attention_total": 2359296, "linear_attention_nnz": 361728, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858880, "linear_attention_total": 2359296, "linear_attention_nnz": 238336, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}}, "total_sparsity": 57.57962486284496, "linear_sparsity": 73.82179542824075}, "speed": {"eval_elapsed_time": 17.57372920983471}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 86.84346997900737}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-130000": {"stats": {"total": 108893186, "nnz": 38778370, "linear_total": 84934656, "linear_nnz": 14819840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1050624, "linear_attention_total": 2359296, "linear_attention_nnz": 488448, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 512512, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1628160, "linear_attention_total": 2359296, "linear_attention_nnz": 628224, "linear_dense_total": 4718592, "linear_dense_nnz": 999936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998592, "linear_attention_total": 2359296, "linear_attention_nnz": 937216, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1939968, "linear_attention_total": 2359296, "linear_attention_nnz": 821760, "linear_dense_total": 4718592, "linear_dense_nnz": 1118208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1709824, "linear_attention_total": 2359296, "linear_attention_nnz": 648448, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404928, "linear_attention_total": 2359296, "linear_attention_nnz": 641536, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817920, "linear_attention_total": 2359296, "linear_attention_nnz": 467712, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 544512, "linear_attention_total": 2359296, "linear_attention_nnz": 403200, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484096, "linear_attention_total": 2359296, "linear_attention_nnz": 367360, "linear_dense_total": 4718592, "linear_dense_nnz": 116736}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496896, "linear_attention_total": 2359296, "linear_attention_nnz": 225024, "linear_dense_total": 4718592, "linear_dense_nnz": 271872}}, "total_sparsity": 64.38861656596218, "linear_sparsity": 82.5514805169753}, "speed": {"eval_elapsed_time": 15.812317132018507}, "opt_eval_metrics": {"exact_match": 78.88363292336803, "f1": 86.63235572290178}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-165000": {"stats": {"total": 108893186, "nnz": 38293506, "linear_total": 84934656, "linear_nnz": 14334976, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1010688, "linear_attention_total": 2359296, "linear_attention_nnz": 468480, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1371392, "linear_attention_total": 2359296, "linear_attention_nnz": 518912, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1590272, "linear_attention_total": 2359296, "linear_attention_nnz": 608768, "linear_dense_total": 4718592, "linear_dense_nnz": 981504}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895936, "linear_attention_total": 2359296, "linear_attention_nnz": 869888, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1869568, "linear_attention_total": 2359296, "linear_attention_nnz": 775936, "linear_dense_total": 4718592, "linear_dense_nnz": 1093632}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663232, "linear_attention_total": 2359296, "linear_attention_nnz": 618752, "linear_dense_total": 4718592, "linear_dense_nnz": 1044480}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 629248, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1295872, "linear_attention_total": 2359296, "linear_attention_nnz": 707584, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 808704, "linear_attention_total": 2359296, "linear_attention_nnz": 463104, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 515840, "linear_attention_total": 2359296, "linear_attention_nnz": 376064, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455936, "linear_attention_total": 2359296, "linear_attention_nnz": 345344, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 212992, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 64.83388225963009, "linear_sparsity": 83.1223476080247}, "speed": {"eval_elapsed_time": 15.71152348187752}, "opt_eval_metrics": {"exact_match": 78.96877956480606, "f1": 86.71968503618079}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38916354, "linear_total": 84934656, "linear_nnz": 14957824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1209344, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 749568}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1494272, "linear_attention_total": 2359296, "linear_attention_nnz": 488192, "linear_dense_total": 4718592, "linear_dense_nnz": 1006080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1636096, "linear_attention_total": 2359296, "linear_attention_nnz": 550144, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969664, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1746944, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 1198080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1782272, "linear_attention_total": 2359296, "linear_attention_nnz": 653312, "linear_dense_total": 4718592, "linear_dense_nnz": 1128960}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1461760, "linear_attention_total": 2359296, "linear_attention_nnz": 593920, "linear_dense_total": 4718592, "linear_dense_nnz": 867840}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 754688, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531968, "linear_attention_total": 2359296, "linear_attention_nnz": 373760, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460032, "linear_attention_total": 2359296, "linear_attention_nnz": 311040, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}}, "total_sparsity": 64.26190156654981, "linear_sparsity": 82.38902150848766}, "speed": {"eval_elapsed_time": 15.398823922965676}, "opt_eval_metrics": {"exact_match": 78.7038789025544, "f1": 86.58426699451658}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 50872322, "linear_total": 84934656, "linear_nnz": 26913792, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692352, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 2007552}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666496, "linear_attention_total": 2359296, "linear_attention_nnz": 646656, "linear_dense_total": 4718592, "linear_dense_nnz": 2019840}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2931200, "linear_attention_total": 2359296, "linear_attention_nnz": 691712, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3361024, "linear_attention_total": 2359296, "linear_attention_nnz": 1149184, "linear_dense_total": 4718592, "linear_dense_nnz": 2211840}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3165952, "linear_attention_total": 2359296, "linear_attention_nnz": 1007872, "linear_dense_total": 4718592, "linear_dense_nnz": 2158080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070976, "linear_attention_total": 2359296, "linear_attention_nnz": 997376, "linear_dense_total": 4718592, "linear_dense_nnz": 2073600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2644480, "linear_attention_total": 2359296, "linear_attention_nnz": 911872, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2248704, "linear_attention_total": 2359296, "linear_attention_nnz": 944640, "linear_dense_total": 4718592, "linear_dense_nnz": 1304064}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1514240, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 751104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 839424, "linear_attention_total": 2359296, "linear_attention_nnz": 526080, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 707072, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 274944}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1071872, "linear_attention_total": 2359296, "linear_attention_nnz": 277760, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}}, "total_sparsity": 53.282364242699266, "linear_sparsity": 68.31235532407408}, "speed": {"eval_elapsed_time": 19.785655258921906}, "opt_eval_metrics": {"exact_match": 79.99053926206244, "f1": 87.56439208763325}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl225_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 27752545, "linear_total": 84934656, "linear_nnz": 3794015, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 465383, "linear_attention_total": 2359296, "linear_attention_nnz": 18728, "linear_dense_total": 4718592, "linear_dense_nnz": 446655}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527397, "linear_attention_total": 2359296, "linear_attention_nnz": 63059, "linear_dense_total": 4718592, "linear_dense_nnz": 464338}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516492, "linear_attention_total": 2359296, "linear_attention_nnz": 53761, "linear_dense_total": 4718592, "linear_dense_nnz": 462731}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 514972, "linear_attention_total": 2359296, "linear_attention_nnz": 84624, "linear_dense_total": 4718592, "linear_dense_nnz": 430348}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 443214, "linear_attention_total": 2359296, "linear_attention_nnz": 58345, "linear_dense_total": 4718592, "linear_dense_nnz": 384869}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 396921, "linear_attention_total": 2359296, "linear_attention_nnz": 50615, "linear_dense_total": 4718592, "linear_dense_nnz": 346306}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 319004, "linear_attention_total": 2359296, "linear_attention_nnz": 41344, "linear_dense_total": 4718592, "linear_dense_nnz": 277660}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 249183, "linear_attention_total": 2359296, "linear_attention_nnz": 47420, "linear_dense_total": 4718592, "linear_dense_nnz": 201763}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 161062, "linear_attention_total": 2359296, "linear_attention_nnz": 27562, "linear_dense_total": 4718592, "linear_dense_nnz": 133500}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 81705, "linear_attention_total": 2359296, "linear_attention_nnz": 34151, "linear_dense_total": 4718592, "linear_dense_nnz": 47554}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64643, "linear_attention_total": 2359296, "linear_attention_nnz": 21311, "linear_dense_total": 4718592, "linear_dense_nnz": 43332}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 54039, "linear_attention_total": 2359296, "linear_attention_nnz": 17233, "linear_dense_total": 4718592, "linear_dense_nnz": 36806}}, "total_sparsity": 74.51397463933142, "linear_sparsity": 95.5330189363456}, "speed": {"eval_elapsed_time": 28.275199214927852}, "opt_eval_metrics": {"exact_match": 77.39829706717124, "f1": 85.66626983371626}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41730197, "linear_total": 84934656, "linear_nnz": 17822208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2202624, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2397696, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1218048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2302464, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1122816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1794048, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1460736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1155072, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1290240, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 503808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 423936, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 227328}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 806400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 904704, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 511488}}, "total_sparsity": 61.67786201057612, "linear_sparsity": 79.0165653935185}, "speed": {"eval_elapsed_time": 12.337535696104169}, "opt_eval_metrics": {"exact_match": 77.70104068117313, "f1": 85.6071153919288}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 33917936, "linear_total": 84934656, "linear_nnz": 9959406, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1111233, "linear_attention_total": 2359296, "linear_attention_nnz": 56754, "linear_dense_total": 4718592, "linear_dense_nnz": 1054479}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222867, "linear_attention_total": 2359296, "linear_attention_nnz": 116764, "linear_dense_total": 4718592, "linear_dense_nnz": 1106103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264439, "linear_attention_total": 2359296, "linear_attention_nnz": 127558, "linear_dense_total": 4718592, "linear_dense_nnz": 1136881}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1270104, "linear_attention_total": 2359296, "linear_attention_nnz": 163709, "linear_dense_total": 4718592, "linear_dense_nnz": 1106395}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1202300, "linear_attention_total": 2359296, "linear_attention_nnz": 158018, "linear_dense_total": 4718592, "linear_dense_nnz": 1044282}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1136195, "linear_attention_total": 2359296, "linear_attention_nnz": 125746, "linear_dense_total": 4718592, "linear_dense_nnz": 1010449}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 971117, "linear_attention_total": 2359296, "linear_attention_nnz": 110023, "linear_dense_total": 4718592, "linear_dense_nnz": 861094}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746075, "linear_attention_total": 2359296, "linear_attention_nnz": 113086, "linear_dense_total": 4718592, "linear_dense_nnz": 632989}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488971, "linear_attention_total": 2359296, "linear_attention_nnz": 81879, "linear_dense_total": 4718592, "linear_dense_nnz": 407092}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 250695, "linear_attention_total": 2359296, "linear_attention_nnz": 77365, "linear_dense_total": 4718592, "linear_dense_nnz": 173330}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 172793, "linear_attention_total": 2359296, "linear_attention_nnz": 50915, "linear_dense_total": 4718592, "linear_dense_nnz": 121878}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 122617, "linear_attention_total": 2359296, "linear_attention_nnz": 28303, "linear_dense_total": 4718592, "linear_dense_nnz": 94314}}, "total_sparsity": 68.85210429971255, "linear_sparsity": 88.27403739646628}, "speed": {"eval_elapsed_time": 33.620146826142445}, "opt_eval_metrics": {"exact_match": 79.89593188268685, "f1": 87.64967103979136}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33825359, "linear_total": 84934656, "linear_nnz": 9866829, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1100628, "linear_attention_total": 2359296, "linear_attention_nnz": 56086, "linear_dense_total": 4718592, "linear_dense_nnz": 1044542}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211778, "linear_attention_total": 2359296, "linear_attention_nnz": 115328, "linear_dense_total": 4718592, "linear_dense_nnz": 1096450}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253069, "linear_attention_total": 2359296, "linear_attention_nnz": 125881, "linear_dense_total": 4718592, "linear_dense_nnz": 1127188}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258511, "linear_attention_total": 2359296, "linear_attention_nnz": 161525, "linear_dense_total": 4718592, "linear_dense_nnz": 1096986}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1191705, "linear_attention_total": 2359296, "linear_attention_nnz": 155911, "linear_dense_total": 4718592, "linear_dense_nnz": 1035794}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125428, "linear_attention_total": 2359296, "linear_attention_nnz": 123921, "linear_dense_total": 4718592, "linear_dense_nnz": 1001507}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 961919, "linear_attention_total": 2359296, "linear_attention_nnz": 108430, "linear_dense_total": 4718592, "linear_dense_nnz": 853489}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738628, "linear_attention_total": 2359296, "linear_attention_nnz": 111505, "linear_dense_total": 4718592, "linear_dense_nnz": 627123}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484188, "linear_attention_total": 2359296, "linear_attention_nnz": 80805, "linear_dense_total": 4718592, "linear_dense_nnz": 403383}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 247948, "linear_attention_total": 2359296, "linear_attention_nnz": 76456, "linear_dense_total": 4718592, "linear_dense_nnz": 171492}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 171235, "linear_attention_total": 2359296, "linear_attention_nnz": 50374, "linear_dense_total": 4718592, "linear_dense_nnz": 120861}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 121792, "linear_attention_total": 2359296, "linear_attention_nnz": 28038, "linear_dense_total": 4718592, "linear_dense_nnz": 93754}}, "total_sparsity": 68.93712063856779, "linear_sparsity": 88.38303530657733}, "speed": {"eval_elapsed_time": 33.69571442203596}, "opt_eval_metrics": {"exact_match": 79.8391674550615, "f1": 87.59923644792065}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl150_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 29470276, "linear_total": 84934656, "linear_nnz": 5511746, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655184, "linear_attention_total": 2359296, "linear_attention_nnz": 30729, "linear_dense_total": 4718592, "linear_dense_nnz": 624455}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 733131, "linear_attention_total": 2359296, "linear_attention_nnz": 77742, "linear_dense_total": 4718592, "linear_dense_nnz": 655389}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730379, "linear_attention_total": 2359296, "linear_attention_nnz": 70206, "linear_dense_total": 4718592, "linear_dense_nnz": 660173}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 734451, "linear_attention_total": 2359296, "linear_attention_nnz": 106339, "linear_dense_total": 4718592, "linear_dense_nnz": 628112}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655863, "linear_attention_total": 2359296, "linear_attention_nnz": 81845, "linear_dense_total": 4718592, "linear_dense_nnz": 574018}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 606306, "linear_attention_total": 2359296, "linear_attention_nnz": 68554, "linear_dense_total": 4718592, "linear_dense_nnz": 537752}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 492846, "linear_attention_total": 2359296, "linear_attention_nnz": 58217, "linear_dense_total": 4718592, "linear_dense_nnz": 434629}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379389, "linear_attention_total": 2359296, "linear_attention_nnz": 65705, "linear_dense_total": 4718592, "linear_dense_nnz": 313684}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 243207, "linear_attention_total": 2359296, "linear_attention_nnz": 39483, "linear_dense_total": 4718592, "linear_dense_nnz": 203724}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 119606, "linear_attention_total": 2359296, "linear_attention_nnz": 46007, "linear_dense_total": 4718592, "linear_dense_nnz": 73599}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 89281, "linear_attention_total": 2359296, "linear_attention_nnz": 27892, "linear_dense_total": 4718592, "linear_dense_nnz": 61389}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 72103, "linear_attention_total": 2359296, "linear_attention_nnz": 20781, "linear_dense_total": 4718592, "linear_dense_nnz": 51322}}, "total_sparsity": 72.93652882926945, "linear_sparsity": 93.51060419906804}, "speed": {"eval_elapsed_time": 30.31329287402332}, "opt_eval_metrics": {"exact_match": 78.4484389782403, "f1": 86.3547925481507}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 72624802, "linear_total": 84934656, "linear_nnz": 48687104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4657152, "linear_attention_total": 2359296, "linear_attention_nnz": 621568, "linear_dense_total": 4718592, "linear_dense_nnz": 4035584}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4759552, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 4155392}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5065728, "linear_attention_total": 2359296, "linear_attention_nnz": 781312, "linear_dense_total": 4718592, "linear_dense_nnz": 4284416}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5408768, "linear_attention_total": 2359296, "linear_attention_nnz": 1068032, "linear_dense_total": 4718592, "linear_dense_nnz": 4340736}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5324800, "linear_attention_total": 2359296, "linear_attention_nnz": 1087488, "linear_dense_total": 4718592, "linear_dense_nnz": 4237312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5189632, "linear_attention_total": 2359296, "linear_attention_nnz": 908288, "linear_dense_total": 4718592, "linear_dense_nnz": 4281344}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5169152, "linear_attention_total": 2359296, "linear_attention_nnz": 1019904, "linear_dense_total": 4718592, "linear_dense_nnz": 4149248}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4749312, "linear_attention_total": 2359296, "linear_attention_nnz": 921600, "linear_dense_total": 4718592, "linear_dense_nnz": 3827712}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3932160, "linear_attention_total": 2359296, "linear_attention_nnz": 851968, "linear_dense_total": 4718592, "linear_dense_nnz": 3080192}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1808384, "linear_attention_total": 2359296, "linear_attention_nnz": 529408, "linear_dense_total": 4718592, "linear_dense_nnz": 1278976}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1443840, "linear_attention_total": 2359296, "linear_attention_nnz": 486400, "linear_dense_total": 4718592, "linear_dense_nnz": 957440}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178624, "linear_attention_total": 2359296, "linear_attention_nnz": 286720, "linear_dense_total": 4718592, "linear_dense_nnz": 891904}}, "total_sparsity": 33.306385213120684, "linear_sparsity": 42.67698688271605}, "speed": {"eval_elapsed_time": 27.85703947697766}, "opt_eval_metrics": {"exact_match": 80.72847682119205, "f1": 88.08831525592305}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 72671586, "linear_total": 84934656, "linear_nnz": 48734208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4676608, "linear_attention_total": 2359296, "linear_attention_nnz": 644096, "linear_dense_total": 4718592, "linear_dense_nnz": 4032512}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4756480, "linear_attention_total": 2359296, "linear_attention_nnz": 583680, "linear_dense_total": 4718592, "linear_dense_nnz": 4172800}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5113856, "linear_attention_total": 2359296, "linear_attention_nnz": 789504, "linear_dense_total": 4718592, "linear_dense_nnz": 4324352}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5421056, "linear_attention_total": 2359296, "linear_attention_nnz": 1028096, "linear_dense_total": 4718592, "linear_dense_nnz": 4392960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5426176, "linear_attention_total": 2359296, "linear_attention_nnz": 1067008, "linear_dense_total": 4718592, "linear_dense_nnz": 4359168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5276672, "linear_attention_total": 2359296, "linear_attention_nnz": 943104, "linear_dense_total": 4718592, "linear_dense_nnz": 4333568}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5165056, "linear_attention_total": 2359296, "linear_attention_nnz": 1003520, "linear_dense_total": 4718592, "linear_dense_nnz": 4161536}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4797440, "linear_attention_total": 2359296, "linear_attention_nnz": 908288, "linear_dense_total": 4718592, "linear_dense_nnz": 3889152}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3890176, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 3021824}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1726464, "linear_attention_total": 2359296, "linear_attention_nnz": 520192, "linear_dense_total": 4718592, "linear_dense_nnz": 1206272}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1336320, "linear_attention_total": 2359296, "linear_attention_nnz": 445440, "linear_dense_total": 4718592, "linear_dense_nnz": 890880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1147904, "linear_attention_total": 2359296, "linear_attention_nnz": 272384, "linear_dense_total": 4718592, "linear_dense_nnz": 875520}}, "total_sparsity": 33.26342201062975, "linear_sparsity": 42.62152777777778}, "speed": {"eval_elapsed_time": 27.81183459307067}, "opt_eval_metrics": {"exact_match": 80.81362346263009, "f1": 88.10463591853348}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 72950082, "linear_total": 84934656, "linear_nnz": 49012736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4754432, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 4140032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4843520, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 4246528}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5145600, "linear_attention_total": 2359296, "linear_attention_nnz": 788480, "linear_dense_total": 4718592, "linear_dense_nnz": 4357120}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5488640, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 4426752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5463040, "linear_attention_total": 2359296, "linear_attention_nnz": 1048576, "linear_dense_total": 4718592, "linear_dense_nnz": 4414464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5317632, "linear_attention_total": 2359296, "linear_attention_nnz": 918528, "linear_dense_total": 4718592, "linear_dense_nnz": 4399104}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5230592, "linear_attention_total": 2359296, "linear_attention_nnz": 998400, "linear_dense_total": 4718592, "linear_dense_nnz": 4232192}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4838400, "linear_attention_total": 2359296, "linear_attention_nnz": 899072, "linear_dense_total": 4718592, "linear_dense_nnz": 3939328}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3848192, "linear_attention_total": 2359296, "linear_attention_nnz": 819200, "linear_dense_total": 4718592, "linear_dense_nnz": 3028992}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1620992, "linear_attention_total": 2359296, "linear_attention_nnz": 516096, "linear_dense_total": 4718592, "linear_dense_nnz": 1104896}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1332224, "linear_attention_total": 2359296, "linear_attention_nnz": 450560, "linear_dense_total": 4718592, "linear_dense_nnz": 881664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1129472, "linear_attention_total": 2359296, "linear_attention_nnz": 266240, "linear_dense_total": 4718592, "linear_dense_nnz": 863232}}, "total_sparsity": 33.00767047076757, "linear_sparsity": 42.29359567901234}, "speed": {"eval_elapsed_time": 27.788447924889624}, "opt_eval_metrics": {"exact_match": 80.53926206244087, "f1": 87.95145431777735}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39356610, "linear_total": 84934656, "linear_nnz": 15444992, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1024000, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 512000}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236992, "linear_attention_total": 2359296, "linear_attention_nnz": 551936, "linear_dense_total": 4718592, "linear_dense_nnz": 685056}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1934336, "linear_attention_total": 2359296, "linear_attention_nnz": 722944, "linear_dense_total": 4718592, "linear_dense_nnz": 1211392}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2352128, "linear_attention_total": 2359296, "linear_attention_nnz": 954368, "linear_dense_total": 4718592, "linear_dense_nnz": 1397760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028544, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1238016}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1880064, "linear_attention_total": 2359296, "linear_attention_nnz": 584704, "linear_dense_total": 4718592, "linear_dense_nnz": 1295360}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1627136, "linear_attention_total": 2359296, "linear_attention_nnz": 608256, "linear_dense_total": 4718592, "linear_dense_nnz": 1018880}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1316864, "linear_attention_total": 2359296, "linear_attention_nnz": 740352, "linear_dense_total": 4718592, "linear_dense_nnz": 576512}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 673792, "linear_attention_total": 2359296, "linear_attention_nnz": 510976, "linear_dense_total": 4718592, "linear_dense_nnz": 162816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 451584, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 94208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 304128, "linear_dense_total": 4718592, "linear_dense_nnz": 197632}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417792, "linear_attention_total": 2359296, "linear_attention_nnz": 197632, "linear_dense_total": 4718592, "linear_dense_nnz": 220160}}, "total_sparsity": 63.85760078688487, "linear_sparsity": 81.81544174382715}, "speed": {"eval_elapsed_time": 17.21582882408984}, "opt_eval_metrics": {"exact_match": 76.79280983916746, "f1": 85.3167029862563}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39183362, "linear_total": 84934656, "linear_nnz": 15271936, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1032192, "linear_attention_total": 2359296, "linear_attention_nnz": 513024, "linear_dense_total": 4718592, "linear_dense_nnz": 519168}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1215488, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 692224}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1922048, "linear_attention_total": 2359296, "linear_attention_nnz": 683008, "linear_dense_total": 4718592, "linear_dense_nnz": 1239040}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2319360, "linear_attention_total": 2359296, "linear_attention_nnz": 945152, "linear_dense_total": 4718592, "linear_dense_nnz": 1374208}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045952, "linear_attention_total": 2359296, "linear_attention_nnz": 809984, "linear_dense_total": 4718592, "linear_dense_nnz": 1235968}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1847296, "linear_attention_total": 2359296, "linear_attention_nnz": 581632, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1607680, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1287168, "linear_attention_total": 2359296, "linear_attention_nnz": 708608, "linear_dense_total": 4718592, "linear_dense_nnz": 578560}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631808, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 158720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 442368, "linear_attention_total": 2359296, "linear_attention_nnz": 352256, "linear_dense_total": 4718592, "linear_dense_nnz": 90112}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 312320, "linear_dense_total": 4718592, "linear_dense_nnz": 206848}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 401408, "linear_attention_total": 2359296, "linear_attention_nnz": 186368, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 64.01669981444019, "linear_sparsity": 82.0191936728395}, "speed": {"eval_elapsed_time": 17.197634894168004}, "opt_eval_metrics": {"exact_match": 77.04824976348155, "f1": 85.17930403802184}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 37334018, "linear_total": 84934656, "linear_nnz": 13375488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663488, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1282560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451520, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 566784}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1385472, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1265664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 675840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1069056, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.7150099364344, "linear_sparsity": 84.25202546296296}, "speed": {"eval_elapsed_time": 11.265181887894869}, "opt_eval_metrics": {"exact_match": 76.54683065279092, "f1": 84.56290825102765}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37189634, "linear_total": 84934656, "linear_nnz": 13231104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1658880, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1233408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1354752, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1387008, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 678912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.84760225492897, "linear_sparsity": 84.42201967592592}, "speed": {"eval_elapsed_time": 11.085542490938678}, "opt_eval_metrics": {"exact_match": 75.99810785241249, "f1": 84.26442986520863}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36773378, "linear_total": 84934656, "linear_nnz": 12814848, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1044480, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1177088, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1450496, "linear_attention_total": 2359296, "linear_attention_nnz": 492032, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652224, "linear_attention_total": 2359296, "linear_attention_nnz": 733696, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1511680, "linear_attention_total": 2359296, "linear_attention_nnz": 461056, "linear_dense_total": 4718592, "linear_dense_nnz": 1050624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1533952, "linear_attention_total": 2359296, "linear_attention_nnz": 580096, "linear_dense_total": 4718592, "linear_dense_nnz": 953856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1227520, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 764928}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 624384, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 700416, "linear_attention_total": 2359296, "linear_attention_nnz": 351744, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 479744, "linear_attention_total": 2359296, "linear_attention_nnz": 339968, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411392, "linear_attention_total": 2359296, "linear_attention_nnz": 276224, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 430080, "linear_attention_total": 2359296, "linear_attention_nnz": 178176, "linear_dense_total": 4718592, "linear_dense_nnz": 251904}}, "total_sparsity": 66.22986308803564, "linear_sparsity": 84.912109375}, "speed": {"eval_elapsed_time": 14.512992850970477}, "opt_eval_metrics": {"exact_match": 77.94701986754967, "f1": 86.06827252573265}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 47136529, "linear_total": 84934656, "linear_nnz": 23220736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1984512, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2164224, "linear_attention_total": 2359296, "linear_attention_nnz": 592896, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2625536, "linear_attention_total": 2359296, "linear_attention_nnz": 880640, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2992640, "linear_attention_total": 2359296, "linear_attention_nnz": 1230848, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2940928, "linear_attention_total": 2359296, "linear_attention_nnz": 1214464, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2535936, "linear_attention_total": 2359296, "linear_attention_nnz": 906240, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2213376, "linear_attention_total": 2359296, "linear_attention_nnz": 943104, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1923584, "linear_attention_total": 2359296, "linear_attention_nnz": 935936, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1419264, "linear_attention_total": 2359296, "linear_attention_nnz": 872448, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 883712, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667648, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 869376, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 56.713059162397904, "linear_sparsity": 72.66046971450618}, "speed": {"eval_elapsed_time": 18.717300809919834}, "opt_eval_metrics": {"exact_match": 80.27436140018922, "f1": 87.70461789964966}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46572775, "linear_total": 84934656, "linear_nnz": 22657536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1908736, "linear_attention_total": 2359296, "linear_attention_nnz": 627712, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2145280, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 1548288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2499072, "linear_attention_total": 2359296, "linear_attention_nnz": 789504, "linear_dense_total": 4718592, "linear_dense_nnz": 1709568}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2920960, "linear_attention_total": 2359296, "linear_attention_nnz": 1180672, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1204224, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2516992, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 1600512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2151936, "linear_attention_total": 2359296, "linear_attention_nnz": 909312, "linear_dense_total": 4718592, "linear_dense_nnz": 1242624}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1889792, "linear_attention_total": 2359296, "linear_attention_nnz": 917504, "linear_dense_total": 4718592, "linear_dense_nnz": 972288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1398272, "linear_attention_total": 2359296, "linear_attention_nnz": 856064, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858624, "linear_attention_total": 2359296, "linear_attention_nnz": 611328, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 634368, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 827392, "linear_attention_total": 2359296, "linear_attention_nnz": 268288, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}}, "total_sparsity": 57.23077199706509, "linear_sparsity": 73.32356770833333}, "speed": {"eval_elapsed_time": 18.605645736912265}, "opt_eval_metrics": {"exact_match": 79.80132450331126, "f1": 87.48291010744668}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-10000": {"stats": {"total": 108893186, "nnz": 107798786, "linear_total": 84934656, "linear_nnz": 83840256, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6865920, "linear_attention_total": 2359296, "linear_attention_nnz": 2151936, "linear_dense_total": 4718592, "linear_dense_nnz": 4713984}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7009024, "linear_attention_total": 2359296, "linear_attention_nnz": 2299648, "linear_dense_total": 4718592, "linear_dense_nnz": 4709376}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7037952, "linear_attention_total": 2359296, "linear_attention_nnz": 2330112, "linear_dense_total": 4718592, "linear_dense_nnz": 4707840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7041024, "linear_attention_total": 2359296, "linear_attention_nnz": 2330112, "linear_dense_total": 4718592, "linear_dense_nnz": 4710912}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7029760, "linear_attention_total": 2359296, "linear_attention_nnz": 2324992, "linear_dense_total": 4718592, "linear_dense_nnz": 4704768}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7043584, "linear_attention_total": 2359296, "linear_attention_nnz": 2337280, "linear_dense_total": 4718592, "linear_dense_nnz": 4706304}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7006464, "linear_attention_total": 2359296, "linear_attention_nnz": 2321664, "linear_dense_total": 4718592, "linear_dense_nnz": 4684800}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7025664, "linear_attention_total": 2359296, "linear_attention_nnz": 2342400, "linear_dense_total": 4718592, "linear_dense_nnz": 4683264}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6950656, "linear_attention_total": 2359296, "linear_attention_nnz": 2296576, "linear_dense_total": 4718592, "linear_dense_nnz": 4654080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6905600, "linear_attention_total": 2359296, "linear_attention_nnz": 2259200, "linear_dense_total": 4718592, "linear_dense_nnz": 4646400}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6924288, "linear_attention_total": 2359296, "linear_attention_nnz": 2285568, "linear_dense_total": 4718592, "linear_dense_nnz": 4638720}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7000320, "linear_attention_total": 2359296, "linear_attention_nnz": 2312448, "linear_dense_total": 4718592, "linear_dense_nnz": 4687872}}, "total_sparsity": 1.005021563057218, "linear_sparsity": 1.288519965277779}, "speed": {"eval_elapsed_time": 41.11115196393803}, "opt_eval_metrics": {"exact_match": 80.90823084200568, "f1": 88.13888839423888}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-15000": {"stats": {"total": 108893186, "nnz": 104455426, "linear_total": 84934656, "linear_nnz": 80496896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6593280, "linear_attention_total": 2359296, "linear_attention_nnz": 1914624, "linear_dense_total": 4718592, "linear_dense_nnz": 4678656}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6772736, "linear_attention_total": 2359296, "linear_attention_nnz": 2103296, "linear_dense_total": 4718592, "linear_dense_nnz": 4669440}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6885632, "linear_attention_total": 2359296, "linear_attention_nnz": 2239232, "linear_dense_total": 4718592, "linear_dense_nnz": 4646400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6876672, "linear_attention_total": 2359296, "linear_attention_nnz": 2219520, "linear_dense_total": 4718592, "linear_dense_nnz": 4657152}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6873600, "linear_attention_total": 2359296, "linear_attention_nnz": 2216448, "linear_dense_total": 4718592, "linear_dense_nnz": 4657152}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6841856, "linear_attention_total": 2359296, "linear_attention_nnz": 2226176, "linear_dense_total": 4718592, "linear_dense_nnz": 4615680}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6786560, "linear_attention_total": 2359296, "linear_attention_nnz": 2190848, "linear_dense_total": 4718592, "linear_dense_nnz": 4595712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6811392, "linear_attention_total": 2359296, "linear_attention_nnz": 2261760, "linear_dense_total": 4718592, "linear_dense_nnz": 4549632}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6609408, "linear_attention_total": 2359296, "linear_attention_nnz": 2178048, "linear_dense_total": 4718592, "linear_dense_nnz": 4431360}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6399744, "linear_attention_total": 2359296, "linear_attention_nnz": 2049792, "linear_dense_total": 4718592, "linear_dense_nnz": 4349952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6406656, "linear_attention_total": 2359296, "linear_attention_nnz": 2053632, "linear_dense_total": 4718592, "linear_dense_nnz": 4353024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6639360, "linear_attention_total": 2359296, "linear_attention_nnz": 2100480, "linear_dense_total": 4718592, "linear_dense_nnz": 4538880}}, "total_sparsity": 4.075333051601593, "linear_sparsity": 5.224910783179015}, "speed": {"eval_elapsed_time": 40.64612381509505}, "opt_eval_metrics": {"exact_match": 78.21192052980132, "f1": 86.2154189083501}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 58295010, "linear_total": 84934656, "linear_nnz": 34364416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2712576, "linear_attention_total": 2359296, "linear_attention_nnz": 934912, "linear_dense_total": 4718592, "linear_dense_nnz": 1777664}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2907136, "linear_attention_total": 2359296, "linear_attention_nnz": 738304, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4201472, "linear_attention_total": 2359296, "linear_attention_nnz": 1162240, "linear_dense_total": 4718592, "linear_dense_nnz": 3039232}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4531200, "linear_attention_total": 2359296, "linear_attention_nnz": 1366016, "linear_dense_total": 4718592, "linear_dense_nnz": 3165184}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4667392, "linear_attention_total": 2359296, "linear_attention_nnz": 1484800, "linear_dense_total": 4718592, "linear_dense_nnz": 3182592}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4599808, "linear_attention_total": 2359296, "linear_attention_nnz": 1414144, "linear_dense_total": 4718592, "linear_dense_nnz": 3185664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3828736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 2572288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2659328, "linear_attention_total": 2359296, "linear_attention_nnz": 991232, "linear_dense_total": 4718592, "linear_dense_nnz": 1668096}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1654784, "linear_attention_total": 2359296, "linear_attention_nnz": 966656, "linear_dense_total": 4718592, "linear_dense_nnz": 688128}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 927744, "linear_attention_total": 2359296, "linear_attention_nnz": 691200, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 873472, "linear_attention_total": 2359296, "linear_attention_nnz": 530432, "linear_dense_total": 4718592, "linear_dense_nnz": 343040}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 800768, "linear_attention_total": 2359296, "linear_attention_nnz": 378880, "linear_dense_total": 4718592, "linear_dense_nnz": 421888}}, "total_sparsity": 46.46587895775224, "linear_sparsity": 59.540171682098766}, "speed": {"eval_elapsed_time": 25.710868231020868}, "opt_eval_metrics": {"exact_match": 79.64995269631031, "f1": 87.30139925832849}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 56885634, "linear_total": 84934656, "linear_nnz": 32956416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2584576, "linear_attention_total": 2359296, "linear_attention_nnz": 949248, "linear_dense_total": 4718592, "linear_dense_nnz": 1635328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2798592, "linear_attention_total": 2359296, "linear_attention_nnz": 750592, "linear_dense_total": 4718592, "linear_dense_nnz": 2048000}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4019200, "linear_attention_total": 2359296, "linear_attention_nnz": 1123328, "linear_dense_total": 4718592, "linear_dense_nnz": 2895872}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4274176, "linear_attention_total": 2359296, "linear_attention_nnz": 1306624, "linear_dense_total": 4718592, "linear_dense_nnz": 2967552}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4581376, "linear_attention_total": 2359296, "linear_attention_nnz": 1475584, "linear_dense_total": 4718592, "linear_dense_nnz": 3105792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4219904, "linear_attention_total": 2359296, "linear_attention_nnz": 1285120, "linear_dense_total": 4718592, "linear_dense_nnz": 2934784}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3736576, "linear_attention_total": 2359296, "linear_attention_nnz": 1235968, "linear_dense_total": 4718592, "linear_dense_nnz": 2500608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2587648, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1604608}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1627136, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 661504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 880640, "linear_attention_total": 2359296, "linear_attention_nnz": 650240, "linear_dense_total": 4718592, "linear_dense_nnz": 230400}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 862208, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 352256}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784384, "linear_attention_total": 2359296, "linear_attention_nnz": 363520, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}}, "total_sparsity": 47.76015277944021, "linear_sparsity": 61.19791666666667}, "speed": {"eval_elapsed_time": 25.134117686888203}, "opt_eval_metrics": {"exact_match": 79.90539262062441, "f1": 87.36378709007766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 55520034, "linear_total": 84934656, "linear_nnz": 31592448, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2382848, "linear_attention_total": 2359296, "linear_attention_nnz": 889856, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2568192, "linear_attention_total": 2359296, "linear_attention_nnz": 717824, "linear_dense_total": 4718592, "linear_dense_nnz": 1850368}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3915776, "linear_attention_total": 2359296, "linear_attention_nnz": 1113088, "linear_dense_total": 4718592, "linear_dense_nnz": 2802688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4258816, "linear_attention_total": 2359296, "linear_attention_nnz": 1297408, "linear_dense_total": 4718592, "linear_dense_nnz": 2961408}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4300800, "linear_attention_total": 2359296, "linear_attention_nnz": 1402880, "linear_dense_total": 4718592, "linear_dense_nnz": 2897920}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4030464, "linear_attention_total": 2359296, "linear_attention_nnz": 1157120, "linear_dense_total": 4718592, "linear_dense_nnz": 2873344}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3661824, "linear_attention_total": 2359296, "linear_attention_nnz": 1187840, "linear_dense_total": 4718592, "linear_dense_nnz": 2473984}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2507776, "linear_attention_total": 2359296, "linear_attention_nnz": 979968, "linear_dense_total": 4718592, "linear_dense_nnz": 1527808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1562624, "linear_attention_total": 2359296, "linear_attention_nnz": 952320, "linear_dense_total": 4718592, "linear_dense_nnz": 610304}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 865280, "linear_attention_total": 2359296, "linear_attention_nnz": 642048, "linear_dense_total": 4718592, "linear_dense_nnz": 223232}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818176, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 328704}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 719872, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 388096}}, "total_sparsity": 49.0142257386059, "linear_sparsity": 62.80381944444444}, "speed": {"eval_elapsed_time": 24.50548317306675}, "opt_eval_metrics": {"exact_match": 79.82024597918638, "f1": 87.30735739624531}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 55329122, "linear_total": 84934656, "linear_nnz": 31404032, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2845696, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 2385920}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3173376, "linear_attention_total": 2359296, "linear_attention_nnz": 374784, "linear_dense_total": 4718592, "linear_dense_nnz": 2798592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3866624, "linear_attention_total": 2359296, "linear_attention_nnz": 411648, "linear_dense_total": 4718592, "linear_dense_nnz": 3454976}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4224000, "linear_attention_total": 2359296, "linear_attention_nnz": 727040, "linear_dense_total": 4718592, "linear_dense_nnz": 3496960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3954688, "linear_attention_total": 2359296, "linear_attention_nnz": 541696, "linear_dense_total": 4718592, "linear_dense_nnz": 3412992}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3993600, "linear_attention_total": 2359296, "linear_attention_nnz": 545792, "linear_dense_total": 4718592, "linear_dense_nnz": 3447808}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3427328, "linear_attention_total": 2359296, "linear_attention_nnz": 493568, "linear_dense_total": 4718592, "linear_dense_nnz": 2933760}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2641920, "linear_attention_total": 2359296, "linear_attention_nnz": 641024, "linear_dense_total": 4718592, "linear_dense_nnz": 2000896}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1293312, "linear_attention_total": 2359296, "linear_attention_nnz": 288768, "linear_dense_total": 4718592, "linear_dense_nnz": 1004544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 678912, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 339968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 671744, "linear_attention_total": 2359296, "linear_attention_nnz": 254976, "linear_dense_total": 4718592, "linear_dense_nnz": 416768}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 632832, "linear_attention_total": 2359296, "linear_attention_nnz": 165888, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}}, "total_sparsity": 49.1895461668281, "linear_sparsity": 63.025655864197525}, "speed": {"eval_elapsed_time": 21.893441491993144}, "opt_eval_metrics": {"exact_match": 77.68211920529801, "f1": 86.11161494070976}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 54788706, "linear_total": 84934656, "linear_nnz": 30864384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2661376, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 2226176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3087360, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 2727936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3895296, "linear_attention_total": 2359296, "linear_attention_nnz": 421888, "linear_dense_total": 4718592, "linear_dense_nnz": 3473408}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4162560, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 3451904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3985408, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 3437568}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3881984, "linear_attention_total": 2359296, "linear_attention_nnz": 556032, "linear_dense_total": 4718592, "linear_dense_nnz": 3325952}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3340288, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 2828288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2614272, "linear_attention_total": 2359296, "linear_attention_nnz": 622592, "linear_dense_total": 4718592, "linear_dense_nnz": 1991680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1256448, "linear_attention_total": 2359296, "linear_attention_nnz": 276480, "linear_dense_total": 4718592, "linear_dense_nnz": 979968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668672, "linear_attention_total": 2359296, "linear_attention_nnz": 337920, "linear_dense_total": 4718592, "linear_dense_nnz": 330752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 664576, "linear_attention_total": 2359296, "linear_attention_nnz": 252928, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 646144, "linear_attention_total": 2359296, "linear_attention_nnz": 158720, "linear_dense_total": 4718592, "linear_dense_nnz": 487424}}, "total_sparsity": 49.68582699012958, "linear_sparsity": 63.66102430555556}, "speed": {"eval_elapsed_time": 21.611296633956954}, "opt_eval_metrics": {"exact_match": 77.96594134342479, "f1": 86.01491496793933}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53430466, "linear_total": 84934656, "linear_nnz": 29507584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2533376, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 2119680}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2840576, "linear_attention_total": 2359296, "linear_attention_nnz": 364544, "linear_dense_total": 4718592, "linear_dense_nnz": 2476032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3843072, "linear_attention_total": 2359296, "linear_attention_nnz": 397312, "linear_dense_total": 4718592, "linear_dense_nnz": 3445760}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4069376, "linear_attention_total": 2359296, "linear_attention_nnz": 666624, "linear_dense_total": 4718592, "linear_dense_nnz": 3402752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3831808, "linear_attention_total": 2359296, "linear_attention_nnz": 492544, "linear_dense_total": 4718592, "linear_dense_nnz": 3339264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3714048, "linear_attention_total": 2359296, "linear_attention_nnz": 519168, "linear_dense_total": 4718592, "linear_dense_nnz": 3194880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3200000, "linear_attention_total": 2359296, "linear_attention_nnz": 448512, "linear_dense_total": 4718592, "linear_dense_nnz": 2751488}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415616, "linear_attention_total": 2359296, "linear_attention_nnz": 576512, "linear_dense_total": 4718592, "linear_dense_nnz": 1839104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211392, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619520, "linear_attention_total": 2359296, "linear_attention_nnz": 317440, "linear_dense_total": 4718592, "linear_dense_nnz": 302080}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 623616, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 386048}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 50.93314103235074, "linear_sparsity": 65.25848765432099}, "speed": {"eval_elapsed_time": 21.071897589135915}, "opt_eval_metrics": {"exact_match": 77.70104068117313, "f1": 85.88451743537976}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 40733175, "linear_total": 84934656, "linear_nnz": 16822784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1394688, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1640960, "linear_attention_total": 2359296, "linear_attention_nnz": 539648, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1878528, "linear_attention_total": 2359296, "linear_attention_nnz": 657408, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 864256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1902592, "linear_attention_total": 2359296, "linear_attention_nnz": 686080, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601536, "linear_attention_total": 2359296, "linear_attention_nnz": 649216, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1507328, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908800, "linear_attention_total": 2359296, "linear_attention_nnz": 474112, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 354304, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591872, "linear_attention_total": 2359296, "linear_attention_nnz": 226304, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 62.593458327135366, "linear_sparsity": 80.19326292438271}, "speed": {"eval_elapsed_time": 16.06849605194293}, "opt_eval_metrics": {"exact_match": 78.68495742667928, "f1": 86.66781681977909}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40239113, "linear_total": 84934656, "linear_nnz": 16329216, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1344512, "linear_attention_total": 2359296, "linear_attention_nnz": 518144, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 516096, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842176, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2097664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1184256}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2056192, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 664576, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1565184, "linear_attention_total": 2359296, "linear_attention_nnz": 629760, "linear_dense_total": 4718592, "linear_dense_nnz": 935424}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1486336, "linear_attention_total": 2359296, "linear_attention_nnz": 787456, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 844288, "linear_attention_total": 2359296, "linear_attention_nnz": 415744, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 423936, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 472064, "linear_attention_total": 2359296, "linear_attention_nnz": 324608, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555520, "linear_attention_total": 2359296, "linear_attention_nnz": 209920, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}}, "total_sparsity": 63.04717083032174, "linear_sparsity": 80.7743778935185}, "speed": {"eval_elapsed_time": 15.895570316817611}, "opt_eval_metrics": {"exact_match": 78.80794701986756, "f1": 86.74156854566804}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.9999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-5000": {"stats": {"total": 108893186, "nnz": 108881410, "linear_total": 84934656, "linear_nnz": 84922880, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7072768, "linear_attention_total": 2359296, "linear_attention_nnz": 2354176, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7076352, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4717056}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7076864, "linear_attention_total": 2359296, "linear_attention_nnz": 2358272, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7076864, "linear_attention_total": 2359296, "linear_attention_nnz": 2358272, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7074816, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4715520}}, "total_sparsity": 0.010814267111258768, "linear_sparsity": 0.013864776234573384}, "speed": {"eval_elapsed_time": 40.132621727185324}, "opt_eval_metrics": {"exact_match": 78.7038789025544, "f1": 86.6699349353281}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 43189250, "linear_total": 84934656, "linear_nnz": 19230720, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1584896, "linear_attention_total": 2359296, "linear_attention_nnz": 494336, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1917184, "linear_attention_total": 2359296, "linear_attention_nnz": 631552, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2092032, "linear_attention_total": 2359296, "linear_attention_nnz": 648192, "linear_dense_total": 4718592, "linear_dense_nnz": 1443840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2466816, "linear_attention_total": 2359296, "linear_attention_nnz": 1047552, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2403328, "linear_attention_total": 2359296, "linear_attention_nnz": 942592, "linear_dense_total": 4718592, "linear_dense_nnz": 1460736}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2211072, "linear_attention_total": 2359296, "linear_attention_nnz": 837888, "linear_dense_total": 4718592, "linear_dense_nnz": 1373184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1936640, "linear_attention_total": 2359296, "linear_attention_nnz": 841472, "linear_dense_total": 4718592, "linear_dense_nnz": 1095168}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661440, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 827904}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1084160, "linear_attention_total": 2359296, "linear_attention_nnz": 621824, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621056, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 188928}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 568064, "linear_attention_total": 2359296, "linear_attention_nnz": 411392, "linear_dense_total": 4718592, "linear_dense_nnz": 156672}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 684032, "linear_attention_total": 2359296, "linear_attention_nnz": 223232, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 60.33796825450584, "linear_sparsity": 77.3582175925926}, "speed": {"eval_elapsed_time": 18.20940860803239}, "opt_eval_metrics": {"exact_match": 78.73226111636707, "f1": 86.74884583609185}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42070530, "linear_total": 84934656, "linear_nnz": 18112000, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451008, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 970752}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835264, "linear_attention_total": 2359296, "linear_attention_nnz": 620288, "linear_dense_total": 4718592, "linear_dense_nnz": 1214976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2000384, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1374720}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2271232, "linear_attention_total": 2359296, "linear_attention_nnz": 933376, "linear_dense_total": 4718592, "linear_dense_nnz": 1337856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2267904, "linear_attention_total": 2359296, "linear_attention_nnz": 862464, "linear_dense_total": 4718592, "linear_dense_nnz": 1405440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081536, "linear_attention_total": 2359296, "linear_attention_nnz": 783616, "linear_dense_total": 4718592, "linear_dense_nnz": 1297920}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1807104, "linear_attention_total": 2359296, "linear_attention_nnz": 773376, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602048, "linear_attention_total": 2359296, "linear_attention_nnz": 811008, "linear_dense_total": 4718592, "linear_dense_nnz": 791040}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1009920, "linear_attention_total": 2359296, "linear_attention_nnz": 572160, "linear_dense_total": 4718592, "linear_dense_nnz": 437760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 543232, "linear_attention_total": 2359296, "linear_attention_nnz": 392704, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649472, "linear_attention_total": 2359296, "linear_attention_nnz": 214784, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}}, "total_sparsity": 61.365323630075444, "linear_sparsity": 78.67537133487654}, "speed": {"eval_elapsed_time": 18.07671318203211}, "opt_eval_metrics": {"exact_match": 78.97824030274361, "f1": 86.77789246016766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41670402, "linear_total": 84934656, "linear_nnz": 17711872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1409024, "linear_attention_total": 2359296, "linear_attention_nnz": 468992, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1792000, "linear_attention_total": 2359296, "linear_attention_nnz": 606208, "linear_dense_total": 4718592, "linear_dense_nnz": 1185792}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1974272, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1348608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2231552, "linear_attention_total": 2359296, "linear_attention_nnz": 910592, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2209536, "linear_attention_total": 2359296, "linear_attention_nnz": 828672, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2046464, "linear_attention_total": 2359296, "linear_attention_nnz": 765440, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1764096, "linear_attention_total": 2359296, "linear_attention_nnz": 761088, "linear_dense_total": 4718592, "linear_dense_nnz": 1003008}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1573120, "linear_attention_total": 2359296, "linear_attention_nnz": 792832, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 986880, "linear_attention_total": 2359296, "linear_attention_nnz": 553728, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 572672, "linear_attention_total": 2359296, "linear_attention_nnz": 389888, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 525568, "linear_attention_total": 2359296, "linear_attention_nnz": 378112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}}, "total_sparsity": 61.73277361909495, "linear_sparsity": 79.14647231867285}, "speed": {"eval_elapsed_time": 17.641912897117436}, "opt_eval_metrics": {"exact_match": 78.74172185430463, "f1": 86.69521763053608}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41478658, "linear_total": 84934656, "linear_nnz": 17520128, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404160, "linear_attention_total": 2359296, "linear_attention_nnz": 465664, "linear_dense_total": 4718592, "linear_dense_nnz": 938496}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1766912, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1182720}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1961216, "linear_attention_total": 2359296, "linear_attention_nnz": 615680, "linear_dense_total": 4718592, "linear_dense_nnz": 1345536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 895488, "linear_dense_total": 4718592, "linear_dense_nnz": 1314816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2189824, "linear_attention_total": 2359296, "linear_attention_nnz": 812032, "linear_dense_total": 4718592, "linear_dense_nnz": 1377792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2038016, "linear_attention_total": 2359296, "linear_attention_nnz": 755456, "linear_dense_total": 4718592, "linear_dense_nnz": 1282560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1738240, "linear_attention_total": 2359296, "linear_attention_nnz": 739840, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571584, "linear_attention_total": 2359296, "linear_attention_nnz": 797440, "linear_dense_total": 4718592, "linear_dense_nnz": 774144}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 943872, "linear_attention_total": 2359296, "linear_attention_nnz": 513792, "linear_dense_total": 4718592, "linear_dense_nnz": 430080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563968, "linear_attention_total": 2359296, "linear_attention_nnz": 381184, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516352, "linear_attention_total": 2359296, "linear_attention_nnz": 370432, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 615680, "linear_attention_total": 2359296, "linear_attention_nnz": 200960, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}}, "total_sparsity": 61.90885809879785, "linear_sparsity": 79.37222704475309}, "speed": {"eval_elapsed_time": 17.640617809956893}, "opt_eval_metrics": {"exact_match": 78.84578997161779, "f1": 86.78133258210022}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40218943, "linear_total": 84934656, "linear_nnz": 16260413, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725694, "linear_attention_total": 2359296, "linear_attention_nnz": 36794, "linear_dense_total": 4718592, "linear_dense_nnz": 1688900}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959620, "linear_attention_total": 2359296, "linear_attention_nnz": 233028, "linear_dense_total": 4718592, "linear_dense_nnz": 1726592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969125, "linear_attention_total": 2359296, "linear_attention_nnz": 194318, "linear_dense_total": 4718592, "linear_dense_nnz": 1774807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2012358, "linear_attention_total": 2359296, "linear_attention_nnz": 270153, "linear_dense_total": 4718592, "linear_dense_nnz": 1742205}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860862, "linear_attention_total": 2359296, "linear_attention_nnz": 207935, "linear_dense_total": 4718592, "linear_dense_nnz": 1652927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815188, "linear_attention_total": 2359296, "linear_attention_nnz": 215427, "linear_dense_total": 4718592, "linear_dense_nnz": 1599761}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518978, "linear_attention_total": 2359296, "linear_attention_nnz": 114563, "linear_dense_total": 4718592, "linear_dense_nnz": 1404415}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307646, "linear_attention_total": 2359296, "linear_attention_nnz": 165011, "linear_dense_total": 4718592, "linear_dense_nnz": 1142635}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 946142, "linear_attention_total": 2359296, "linear_attention_nnz": 86589, "linear_dense_total": 4718592, "linear_dense_nnz": 859553}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531809, "linear_attention_total": 2359296, "linear_attention_nnz": 110020, "linear_dense_total": 4718592, "linear_dense_nnz": 421789}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 419075, "linear_attention_total": 2359296, "linear_attention_nnz": 89475, "linear_dense_total": 4718592, "linear_dense_nnz": 329600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 193916, "linear_attention_total": 2359296, "linear_attention_nnz": 45791, "linear_dense_total": 4718592, "linear_dense_nnz": 148125}}, "total_sparsity": 63.065693568741764, "linear_sparsity": 80.85538487375518}, "speed": {"eval_elapsed_time": 25.115268317982554}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.30981160352648}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl5_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-10000": {"stats": {"total": 108893186, "nnz": 107521026, "linear_total": 84934656, "linear_nnz": 83562496, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6776832, "linear_attention_total": 2359296, "linear_attention_nnz": 2067456, "linear_dense_total": 4718592, "linear_dense_nnz": 4709376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6964224, "linear_attention_total": 2359296, "linear_attention_nnz": 2279424, "linear_dense_total": 4718592, "linear_dense_nnz": 4684800}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7028736, "linear_attention_total": 2359296, "linear_attention_nnz": 2329600, "linear_dense_total": 4718592, "linear_dense_nnz": 4699136}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7020544, "linear_attention_total": 2359296, "linear_attention_nnz": 2313216, "linear_dense_total": 4718592, "linear_dense_nnz": 4707328}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7008256, "linear_attention_total": 2359296, "linear_attention_nnz": 2319360, "linear_dense_total": 4718592, "linear_dense_nnz": 4688896}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7026688, "linear_attention_total": 2359296, "linear_attention_nnz": 2332672, "linear_dense_total": 4718592, "linear_dense_nnz": 4694016}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7005184, "linear_attention_total": 2359296, "linear_attention_nnz": 2317312, "linear_dense_total": 4718592, "linear_dense_nnz": 4687872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7006208, "linear_attention_total": 2359296, "linear_attention_nnz": 2332672, "linear_dense_total": 4718592, "linear_dense_nnz": 4673536}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6934528, "linear_attention_total": 2359296, "linear_attention_nnz": 2287616, "linear_dense_total": 4718592, "linear_dense_nnz": 4646912}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6907904, "linear_attention_total": 2359296, "linear_attention_nnz": 2265088, "linear_dense_total": 4718592, "linear_dense_nnz": 4642816}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6916096, "linear_attention_total": 2359296, "linear_attention_nnz": 2250752, "linear_dense_total": 4718592, "linear_dense_nnz": 4665344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6967296, "linear_attention_total": 2359296, "linear_attention_nnz": 2277376, "linear_dense_total": 4718592, "linear_dense_nnz": 4689920}}, "total_sparsity": 1.260097211224953, "linear_sparsity": 1.6155478395061706}, "speed": {"eval_elapsed_time": 40.19490528292954}, "opt_eval_metrics": {"exact_match": 80.17029328287606, "f1": 87.61987487435422}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39702836, "linear_total": 84934656, "linear_nnz": 15791104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125888, "linear_attention_total": 2359296, "linear_attention_nnz": 838656, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1188352, "linear_attention_total": 2359296, "linear_attention_nnz": 692224, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1694720, "linear_attention_total": 2359296, "linear_attention_nnz": 1089536, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1962496, "linear_attention_total": 2359296, "linear_attention_nnz": 1291264, "linear_dense_total": 4718592, "linear_dense_nnz": 671232}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2112512, "linear_attention_total": 2359296, "linear_attention_nnz": 1384448, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1783296, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1632768, "linear_attention_total": 2359296, "linear_attention_nnz": 1127424, "linear_dense_total": 4718592, "linear_dense_nnz": 505344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1333760, "linear_attention_total": 2359296, "linear_attention_nnz": 942080, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1204736, "linear_attention_total": 2359296, "linear_attention_nnz": 982016, "linear_dense_total": 4718592, "linear_dense_nnz": 222720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 729600, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 573952, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 449024, "linear_attention_total": 2359296, "linear_attention_nnz": 293888, "linear_dense_total": 4718592, "linear_dense_nnz": 155136}}, "total_sparsity": 63.53965068117302, "linear_sparsity": 81.40793788580247}, "speed": {"eval_elapsed_time": 17.754389239940792}, "opt_eval_metrics": {"exact_match": 79.20529801324503, "f1": 87.11181141207972}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41908226, "linear_total": 84934656, "linear_nnz": 17949696, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081280, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 606720}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529856, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 841728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2437632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2115072, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1927680, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1448448, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 668160}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665088, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 173568}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 595968, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.51437244200017, "linear_sparsity": 78.86646412037037}, "speed": {"eval_elapsed_time": 12.991677745943889}, "opt_eval_metrics": {"exact_match": 78.05108798486282, "f1": 85.81174728555466}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41954306, "linear_total": 84934656, "linear_nnz": 17995776, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2079744, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1626624, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 840192}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434560, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1058304}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2116608, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1035264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1829376, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886208, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1099776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497600, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1210368, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 764928, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 546816, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.4720557446083, "linear_sparsity": 78.81221064814815}, "speed": {"eval_elapsed_time": 12.87139375694096}, "opt_eval_metrics": {"exact_match": 77.62535477767265, "f1": 85.49958980627748}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 36346370, "linear_total": 84934656, "linear_nnz": 12387840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1721856, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 950784, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1857024, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437696, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1113600, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1015296, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 425472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 731136, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 999936, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 213504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 296448, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 99840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 614400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 122880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}}, "total_sparsity": 66.62199781720042, "linear_sparsity": 85.4148582175926}, "speed": {"eval_elapsed_time": 11.044030340854079}, "opt_eval_metrics": {"exact_match": 76.75496688741723, "f1": 84.83470649534952}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 35750402, "linear_total": 84934656, "linear_nnz": 11791872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893952, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1247232, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 520704}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 978432, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 388608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 365568, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}}, "total_sparsity": 67.16929377013544, "linear_sparsity": 86.11653645833334}, "speed": {"eval_elapsed_time": 10.875461397925392}, "opt_eval_metrics": {"exact_match": 76.3197729422895, "f1": 84.62201750681498}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 35662850, "linear_total": 84934656, "linear_nnz": 11704320, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 897024, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 258048}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804800, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1184256, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1064448, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 474624}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 364032, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}}, "total_sparsity": 67.24969549518002, "linear_sparsity": 86.21961805555556}, "speed": {"eval_elapsed_time": 10.863983491901308}, "opt_eval_metrics": {"exact_match": 76.5279091769158, "f1": 84.6776690586996}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35638274, "linear_total": 84934656, "linear_nnz": 11679744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1586688, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 210432}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887808, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1175040, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 486912}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1062912, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 290304, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 93696}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360960, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 164352}}, "total_sparsity": 67.27226440045568, "linear_sparsity": 86.24855324074075}, "speed": {"eval_elapsed_time": 10.856388033833355}, "opt_eval_metrics": {"exact_match": 76.31031220435194, "f1": 84.63605545666391}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33844482, "linear_total": 84934656, "linear_nnz": 9885952, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701184, "linear_attention_total": 2359296, "linear_attention_nnz": 295680, "linear_dense_total": 4718592, "linear_dense_nnz": 405504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042688, "linear_attention_total": 2359296, "linear_attention_nnz": 380672, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1087744, "linear_attention_total": 2359296, "linear_attention_nnz": 328960, "linear_dense_total": 4718592, "linear_dense_nnz": 758784}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1340672, "linear_attention_total": 2359296, "linear_attention_nnz": 612608, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1142784, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 811008}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1165312, "linear_attention_total": 2359296, "linear_attention_nnz": 411136, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908032, "linear_attention_total": 2359296, "linear_attention_nnz": 319744, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 956672, "linear_attention_total": 2359296, "linear_attention_nnz": 457472, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 557056, "linear_attention_total": 2359296, "linear_attention_nnz": 246784, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360192, "linear_attention_total": 2359296, "linear_attention_nnz": 252672, "linear_dense_total": 4718592, "linear_dense_nnz": 107520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315904, "linear_attention_total": 2359296, "linear_attention_nnz": 202240, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 307712, "linear_attention_total": 2359296, "linear_attention_nnz": 129536, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}}, "total_sparsity": 68.91955939281638, "linear_sparsity": 88.36052035108025}, "speed": {"eval_elapsed_time": 14.372085305862129}, "opt_eval_metrics": {"exact_match": 76.33869441816462, "f1": 84.90005817955239}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 46753113, "linear_total": 84934656, "linear_nnz": 22841856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2904576, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1430016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2368512, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1582080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3084288, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1904640}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2600448, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1715712}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2244096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1654272}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096640, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1703424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1910784, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1476096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1082880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1534464, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 748032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523776, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 327168}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 990720, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1107456, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 812544}}, "total_sparsity": 57.06516200196401, "linear_sparsity": 73.10655381944444}, "speed": {"eval_elapsed_time": 13.856825530063361}, "opt_eval_metrics": {"exact_match": 78.31598864711448, "f1": 86.14732314693939}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 46580969, "linear_total": 84934656, "linear_nnz": 22669824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2883072, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1408512}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2342400, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1555968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3055104, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1875456}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2585088, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1700352}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1635840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2082816, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1689600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1901568, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1311744}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1468416, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1075200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1528320, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 741888}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520704, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 324096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1093632, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}}, "total_sparsity": 57.22324719197764, "linear_sparsity": 73.30910011574075}, "speed": {"eval_elapsed_time": 13.743963541928679}, "opt_eval_metrics": {"exact_match": 77.96594134342479, "f1": 85.85795020085484}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46293486, "linear_total": 84934656, "linear_nnz": 22382592, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850816, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1376256}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2323968, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1537536}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3022848, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1843200}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2557440, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1672704}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1620480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2068992, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1675776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790976, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1299456}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1462272, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1523712, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 737280}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 322560}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 970752, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 786432}}, "total_sparsity": 57.487251773494805, "linear_sparsity": 73.6472800925926}, "speed": {"eval_elapsed_time": 13.430293028010055}, "opt_eval_metrics": {"exact_match": 77.88079470198676, "f1": 85.81326419854291}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl300_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 26853628, "linear_total": 84934656, "linear_nnz": 2895098, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 357857, "linear_attention_total": 2359296, "linear_attention_nnz": 13195, "linear_dense_total": 4718592, "linear_dense_nnz": 344662}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 405482, "linear_attention_total": 2359296, "linear_attention_nnz": 53357, "linear_dense_total": 4718592, "linear_dense_nnz": 352125}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 395119, "linear_attention_total": 2359296, "linear_attention_nnz": 43981, "linear_dense_total": 4718592, "linear_dense_nnz": 351138}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 394117, "linear_attention_total": 2359296, "linear_attention_nnz": 71058, "linear_dense_total": 4718592, "linear_dense_nnz": 323059}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 335373, "linear_attention_total": 2359296, "linear_attention_nnz": 47705, "linear_dense_total": 4718592, "linear_dense_nnz": 287668}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 292526, "linear_attention_total": 2359296, "linear_attention_nnz": 40348, "linear_dense_total": 4718592, "linear_dense_nnz": 252178}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 238114, "linear_attention_total": 2359296, "linear_attention_nnz": 33002, "linear_dense_total": 4718592, "linear_dense_nnz": 205112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 188891, "linear_attention_total": 2359296, "linear_attention_nnz": 38753, "linear_dense_total": 4718592, "linear_dense_nnz": 150138}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 123365, "linear_attention_total": 2359296, "linear_attention_nnz": 22052, "linear_dense_total": 4718592, "linear_dense_nnz": 101313}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64415, "linear_attention_total": 2359296, "linear_attention_nnz": 28498, "linear_dense_total": 4718592, "linear_dense_nnz": 35917}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 53470, "linear_attention_total": 2359296, "linear_attention_nnz": 18747, "linear_dense_total": 4718592, "linear_dense_nnz": 34723}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 46369, "linear_attention_total": 2359296, "linear_attention_nnz": 15957, "linear_dense_total": 4718592, "linear_dense_nnz": 30412}}, "total_sparsity": 75.33947808267818, "linear_sparsity": 96.59138196780358}, "speed": {"eval_elapsed_time": 25.971711199032143}, "opt_eval_metrics": {"exact_match": 76.98202459791864, "f1": 85.40699359564026}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 40641026, "linear_total": 84934656, "linear_nnz": 16682496, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1930752, "linear_attention_total": 2359296, "linear_attention_nnz": 1390080, "linear_dense_total": 4718592, "linear_dense_nnz": 540672}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1347840, "linear_attention_total": 2359296, "linear_attention_nnz": 622848, "linear_dense_total": 4718592, "linear_dense_nnz": 724992}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2423808, "linear_attention_total": 2359296, "linear_attention_nnz": 1506816, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1864704, "linear_attention_total": 2359296, "linear_attention_nnz": 966144, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1956096, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 990720}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742592, "linear_attention_total": 2359296, "linear_attention_nnz": 734976, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1323264, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835584, "linear_attention_total": 2359296, "linear_attention_nnz": 282624, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307904, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 403968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 536064, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 506880, "linear_attention_total": 2359296, "linear_attention_nnz": 322560, "linear_dense_total": 4718592, "linear_dense_nnz": 184320}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 761088, "linear_attention_total": 2359296, "linear_attention_nnz": 412416, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}}, "total_sparsity": 62.67808162027695, "linear_sparsity": 80.3584346064815}, "speed": {"eval_elapsed_time": 13.440584641881287}, "opt_eval_metrics": {"exact_match": 76.13055818353831, "f1": 84.59415607632204}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 39460610, "linear_total": 84934656, "linear_nnz": 15502080, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1801728, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1127424, "linear_attention_total": 2359296, "linear_attention_nnz": 471552, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2341632, "linear_attention_total": 2359296, "linear_attention_nnz": 1507584, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804032, "linear_attention_total": 2359296, "linear_attention_nnz": 960768, "linear_dense_total": 4718592, "linear_dense_nnz": 843264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899264, "linear_attention_total": 2359296, "linear_attention_nnz": 968448, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529088, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 564480, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738048, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 506880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 893952, "linear_dense_total": 4718592, "linear_dense_nnz": 384000}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668928, "linear_attention_total": 2359296, "linear_attention_nnz": 535296, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488448, "linear_attention_total": 2359296, "linear_attention_nnz": 319488, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 560640, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}}, "total_sparsity": 63.76209435179903, "linear_sparsity": 81.7482277199074}, "speed": {"eval_elapsed_time": 13.018812068970874}, "opt_eval_metrics": {"exact_match": 76.20624408703878, "f1": 84.78885528858153}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 39496706, "linear_total": 84934656, "linear_nnz": 15538176, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798656, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1122816, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 649728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2325504, "linear_attention_total": 2359296, "linear_attention_nnz": 1500672, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790208, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886976, "linear_attention_total": 2359296, "linear_attention_nnz": 963840, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522944, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 924672}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 558336, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 737280, "linear_attention_total": 2359296, "linear_attention_nnz": 235008, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1286400, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 382464}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665856, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 417792, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 559104, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 301056}}, "total_sparsity": 63.72894627217538, "linear_sparsity": 81.70572916666666}, "speed": {"eval_elapsed_time": 12.96851964481175}, "opt_eval_metrics": {"exact_match": 75.67644276253547, "f1": 84.4740049617883}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39419906, "linear_total": 84934656, "linear_nnz": 15461376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1800192, "linear_attention_total": 2359296, "linear_attention_nnz": 1334784, "linear_dense_total": 4718592, "linear_dense_nnz": 465408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1118976, "linear_attention_total": 2359296, "linear_attention_nnz": 473856, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2320896, "linear_attention_total": 2359296, "linear_attention_nnz": 1497600, "linear_dense_total": 4718592, "linear_dense_nnz": 823296}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1788672, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 832512}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1891584, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1520640, "linear_attention_total": 2359296, "linear_attention_nnz": 600576, "linear_dense_total": 4718592, "linear_dense_nnz": 920064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 732672, "linear_attention_total": 2359296, "linear_attention_nnz": 230400, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277184, "linear_attention_total": 2359296, "linear_attention_nnz": 897792, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 660480, "linear_attention_total": 2359296, "linear_attention_nnz": 528384, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555264, "linear_attention_total": 2359296, "linear_attention_nnz": 257280, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.79947410116185, "linear_sparsity": 81.79615162037037}, "speed": {"eval_elapsed_time": 12.973318020114675}, "opt_eval_metrics": {"exact_match": 76.0170293282876, "f1": 84.48208063503463}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39382274, "linear_total": 84934656, "linear_nnz": 15423744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1793280, "linear_attention_total": 2359296, "linear_attention_nnz": 1323264, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1115136, "linear_attention_total": 2359296, "linear_attention_nnz": 470016, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2321664, "linear_attention_total": 2359296, "linear_attention_nnz": 1496832, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1789440, "linear_attention_total": 2359296, "linear_attention_nnz": 960000, "linear_dense_total": 4718592, "linear_dense_nnz": 829440}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1843968, "linear_attention_total": 2359296, "linear_attention_nnz": 917760, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526016, "linear_attention_total": 2359296, "linear_attention_nnz": 607488, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 567552, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730368, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1279488, "linear_attention_total": 2359296, "linear_attention_nnz": 900096, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667392, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 556032, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.83403273736522, "linear_sparsity": 81.84045862268519}, "speed": {"eval_elapsed_time": 12.801363392965868}, "opt_eval_metrics": {"exact_match": 75.93188268684958, "f1": 84.50981123274157}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 45409666, "linear_total": 84934656, "linear_nnz": 21492736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1715200, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 1234944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895424, "linear_attention_total": 2359296, "linear_attention_nnz": 400384, "linear_dense_total": 4718592, "linear_dense_nnz": 1495040}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3012608, "linear_attention_total": 2359296, "linear_attention_nnz": 594944, "linear_dense_total": 4718592, "linear_dense_nnz": 2417664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3094528, "linear_attention_total": 2359296, "linear_attention_nnz": 813056, "linear_dense_total": 4718592, "linear_dense_nnz": 2281472}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2762752, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 2163712}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2707456, "linear_attention_total": 2359296, "linear_attention_nnz": 562176, "linear_dense_total": 4718592, "linear_dense_nnz": 2145280}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2233344, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1741824, "linear_attention_total": 2359296, "linear_attention_nnz": 678912, "linear_dense_total": 4718592, "linear_dense_nnz": 1062912}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 709632, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 370688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 524288, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 164864}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 593920, "linear_attention_total": 2359296, "linear_attention_nnz": 267264, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 163840, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}}, "total_sparsity": 58.29889117212532, "linear_sparsity": 74.6949749228395}, "speed": {"eval_elapsed_time": 18.650014573941007}, "opt_eval_metrics": {"exact_match": 77.01986754966887, "f1": 85.2617013700351}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 44308674, "linear_total": 84934656, "linear_nnz": 20392960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1598464, "linear_attention_total": 2359296, "linear_attention_nnz": 458752, "linear_dense_total": 4718592, "linear_dense_nnz": 1139712}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825792, "linear_attention_total": 2359296, "linear_attention_nnz": 398336, "linear_dense_total": 4718592, "linear_dense_nnz": 1427456}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2854912, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 2257920}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2905088, "linear_attention_total": 2359296, "linear_attention_nnz": 781312, "linear_dense_total": 4718592, "linear_dense_nnz": 2123776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2643968, "linear_attention_total": 2359296, "linear_attention_nnz": 620544, "linear_dense_total": 4718592, "linear_dense_nnz": 2023424}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2543616, "linear_attention_total": 2359296, "linear_attention_nnz": 573440, "linear_dense_total": 4718592, "linear_dense_nnz": 1970176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2049024, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 1588224}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1708032, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 666624, "linear_attention_total": 2359296, "linear_attention_nnz": 307200, "linear_dense_total": 4718592, "linear_dense_nnz": 359424}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489472, "linear_attention_total": 2359296, "linear_attention_nnz": 327680, "linear_dense_total": 4718592, "linear_dense_nnz": 161792}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598016, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 509952, "linear_attention_total": 2359296, "linear_attention_nnz": 162816, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}}, "total_sparsity": 59.309966373837206, "linear_sparsity": 75.98982445987654}, "speed": {"eval_elapsed_time": 18.39338346105069}, "opt_eval_metrics": {"exact_match": 76.98202459791864, "f1": 85.22056943761015}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 43172098, "linear_total": 84934656, "linear_nnz": 19257344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1469440, "linear_attention_total": 2359296, "linear_attention_nnz": 443392, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 396288, "linear_dense_total": 4718592, "linear_dense_nnz": 1296384}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692096, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 2113536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2728960, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 1973248}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2531328, "linear_attention_total": 2359296, "linear_attention_nnz": 565248, "linear_dense_total": 4718592, "linear_dense_nnz": 1966080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434048, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 1887232}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1978368, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 1502208}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638400, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1000448}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 620544, "linear_attention_total": 2359296, "linear_attention_nnz": 310272, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 457728, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 308224}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 467968, "linear_attention_total": 2359296, "linear_attention_nnz": 152576, "linear_dense_total": 4718592, "linear_dense_nnz": 315392}}, "total_sparsity": 60.35371946964616, "linear_sparsity": 77.3268711419753}, "speed": {"eval_elapsed_time": 17.99394460907206}, "opt_eval_metrics": {"exact_match": 76.92526017029329, "f1": 85.21713644985097}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42975330, "linear_total": 84934656, "linear_nnz": 19060736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1463296, "linear_attention_total": 2359296, "linear_attention_nnz": 455680, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1699840, "linear_attention_total": 2359296, "linear_attention_nnz": 399360, "linear_dense_total": 4718592, "linear_dense_nnz": 1300480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2724864, "linear_attention_total": 2359296, "linear_attention_nnz": 544768, "linear_dense_total": 4718592, "linear_dense_nnz": 2180096}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2670592, "linear_attention_total": 2359296, "linear_attention_nnz": 731136, "linear_dense_total": 4718592, "linear_dense_nnz": 1939456}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2498560, "linear_attention_total": 2359296, "linear_attention_nnz": 557056, "linear_dense_total": 4718592, "linear_dense_nnz": 1941504}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2407424, "linear_attention_total": 2359296, "linear_attention_nnz": 527360, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1928192, "linear_attention_total": 2359296, "linear_attention_nnz": 472064, "linear_dense_total": 4718592, "linear_dense_nnz": 1456128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 607232, "linear_dense_total": 4718592, "linear_dense_nnz": 977920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 317440}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455680, "linear_attention_total": 2359296, "linear_attention_nnz": 308224, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 240640, "linear_dense_total": 4718592, "linear_dense_nnz": 305152}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 329728}}, "total_sparsity": 60.53441764482857, "linear_sparsity": 77.55835262345678}, "speed": {"eval_elapsed_time": 17.949384653009474}, "opt_eval_metrics": {"exact_match": 77.08609271523179, "f1": 85.20287591064626}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53955042, "linear_total": 84934656, "linear_nnz": 30029824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2392064, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 1844224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2719744, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 2172928}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3872768, "linear_attention_total": 2359296, "linear_attention_nnz": 675840, "linear_dense_total": 4718592, "linear_dense_nnz": 3196928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4077568, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 3111936}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4003840, "linear_attention_total": 2359296, "linear_attention_nnz": 896000, "linear_dense_total": 4718592, "linear_dense_nnz": 3107840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3832832, "linear_attention_total": 2359296, "linear_attention_nnz": 696320, "linear_dense_total": 4718592, "linear_dense_nnz": 3136512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3280896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 2525184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2510848, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1711104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257472, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 747520}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 420864, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 748544, "linear_attention_total": 2359296, "linear_attention_nnz": 356352, "linear_dense_total": 4718592, "linear_dense_nnz": 392192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 651264, "linear_attention_total": 2359296, "linear_attention_nnz": 217088, "linear_dense_total": 4718592, "linear_dense_nnz": 434176}}, "total_sparsity": 50.45140657377771, "linear_sparsity": 64.6436149691358}, "speed": {"eval_elapsed_time": 22.46348627889529}, "opt_eval_metrics": {"exact_match": 78.94985808893094, "f1": 86.768721062838}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47626001, "linear_total": 84934656, "linear_nnz": 23714304, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2895360, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2330112, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3240960, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2061312}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3095040, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1915392}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2291712, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1800192}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2221056, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1827840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1861632, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1300992, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 1104384}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637376, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 582144, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 385536}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1052160, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 560640}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1205760, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 910848}}, "total_sparsity": 56.2635617989908, "linear_sparsity": 72.07935474537037}, "speed": {"eval_elapsed_time": 13.948209983995184}, "opt_eval_metrics": {"exact_match": 77.78618732261117, "f1": 85.70556837897196}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 45695714, "linear_total": 84934656, "linear_nnz": 21777408, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1549312, "linear_attention_total": 2359296, "linear_attention_nnz": 679936, "linear_dense_total": 4718592, "linear_dense_nnz": 869376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1868800, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 1269760}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2739200, "linear_attention_total": 2359296, "linear_attention_nnz": 875520, "linear_dense_total": 4718592, "linear_dense_nnz": 1863680}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3088384, "linear_attention_total": 2359296, "linear_attention_nnz": 1137664, "linear_dense_total": 4718592, "linear_dense_nnz": 1950720}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2821120, "linear_attention_total": 2359296, "linear_attention_nnz": 1033216, "linear_dense_total": 4718592, "linear_dense_nnz": 1787904}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2709504, "linear_attention_total": 2359296, "linear_attention_nnz": 850944, "linear_dense_total": 4718592, "linear_dense_nnz": 1858560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225152, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1426432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 987136}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049600, "linear_attention_total": 2359296, "linear_attention_nnz": 782336, "linear_dense_total": 4718592, "linear_dense_nnz": 267264}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649216, "linear_attention_total": 2359296, "linear_attention_nnz": 504832, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 662528, "linear_attention_total": 2359296, "linear_attention_nnz": 379904, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 548864, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 290816}}, "total_sparsity": 58.036204395746125, "linear_sparsity": 74.35980902777779}, "speed": {"eval_elapsed_time": 20.075127677991986}, "opt_eval_metrics": {"exact_match": 77.92809839167455, "f1": 85.97854187426412}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 44413282, "linear_total": 84934656, "linear_nnz": 20496384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1436672, "linear_attention_total": 2359296, "linear_attention_nnz": 647168, "linear_dense_total": 4718592, "linear_dense_nnz": 789504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798144, "linear_attention_total": 2359296, "linear_attention_nnz": 591872, "linear_dense_total": 4718592, "linear_dense_nnz": 1206272}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2583552, "linear_attention_total": 2359296, "linear_attention_nnz": 843776, "linear_dense_total": 4718592, "linear_dense_nnz": 1739776}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2975744, "linear_attention_total": 2359296, "linear_attention_nnz": 1118208, "linear_dense_total": 4718592, "linear_dense_nnz": 1857536}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1760256}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2509824, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 1718272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2085888, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 1330176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1731584, "linear_attention_total": 2359296, "linear_attention_nnz": 827392, "linear_dense_total": 4718592, "linear_dense_nnz": 904192}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 726016, "linear_dense_total": 4718592, "linear_dense_nnz": 257024}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 464896, "linear_dense_total": 4718592, "linear_dense_nnz": 118784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 622592, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 263168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 512000, "linear_attention_total": 2359296, "linear_attention_nnz": 240640, "linear_dense_total": 4718592, "linear_dense_nnz": 271360}}, "total_sparsity": 59.21390159343854, "linear_sparsity": 75.86805555555556}, "speed": {"eval_elapsed_time": 19.613351088017225}, "opt_eval_metrics": {"exact_match": 77.8713339640492, "f1": 85.84893170709621}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 67469538, "linear_total": 84934656, "linear_nnz": 43535360, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4336640, "linear_attention_total": 2359296, "linear_attention_nnz": 571392, "linear_dense_total": 4718592, "linear_dense_nnz": 3765248}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4451328, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 3852288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4783104, "linear_attention_total": 2359296, "linear_attention_nnz": 695296, "linear_dense_total": 4718592, "linear_dense_nnz": 4087808}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5047296, "linear_attention_total": 2359296, "linear_attention_nnz": 996352, "linear_dense_total": 4718592, "linear_dense_nnz": 4050944}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5032960, "linear_attention_total": 2359296, "linear_attention_nnz": 923648, "linear_dense_total": 4718592, "linear_dense_nnz": 4109312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4907008, "linear_attention_total": 2359296, "linear_attention_nnz": 865280, "linear_dense_total": 4718592, "linear_dense_nnz": 4041728}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4636672, "linear_attention_total": 2359296, "linear_attention_nnz": 778240, "linear_dense_total": 4718592, "linear_dense_nnz": 3858432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4243456, "linear_attention_total": 2359296, "linear_attention_nnz": 883712, "linear_dense_total": 4718592, "linear_dense_nnz": 3359744}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2818048, "linear_attention_total": 2359296, "linear_attention_nnz": 513024, "linear_dense_total": 4718592, "linear_dense_nnz": 2305024}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1289216, "linear_attention_total": 2359296, "linear_attention_nnz": 462848, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1047552, "linear_attention_total": 2359296, "linear_attention_nnz": 374784, "linear_dense_total": 4718592, "linear_dense_nnz": 672768}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 942080, "linear_attention_total": 2359296, "linear_attention_nnz": 235520, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}}, "total_sparsity": 38.04062450702838, "linear_sparsity": 48.742525077160494}, "speed": {"eval_elapsed_time": 26.131227070000023}, "opt_eval_metrics": {"exact_match": 79.67833491012298, "f1": 87.14623278516426}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 64400930, "linear_total": 84934656, "linear_nnz": 40469504, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3907584, "linear_attention_total": 2359296, "linear_attention_nnz": 527360, "linear_dense_total": 4718592, "linear_dense_nnz": 3380224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4186112, "linear_attention_total": 2359296, "linear_attention_nnz": 524288, "linear_dense_total": 4718592, "linear_dense_nnz": 3661824}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4629504, "linear_attention_total": 2359296, "linear_attention_nnz": 598016, "linear_dense_total": 4718592, "linear_dense_nnz": 4031488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5038080, "linear_attention_total": 2359296, "linear_attention_nnz": 930816, "linear_dense_total": 4718592, "linear_dense_nnz": 4107264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4822016, "linear_attention_total": 2359296, "linear_attention_nnz": 824320, "linear_dense_total": 4718592, "linear_dense_nnz": 3997696}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4773888, "linear_attention_total": 2359296, "linear_attention_nnz": 746496, "linear_dense_total": 4718592, "linear_dense_nnz": 4027392}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4408320, "linear_attention_total": 2359296, "linear_attention_nnz": 670720, "linear_dense_total": 4718592, "linear_dense_nnz": 3737600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3789824, "linear_attention_total": 2359296, "linear_attention_nnz": 794624, "linear_dense_total": 4718592, "linear_dense_nnz": 2995200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2176000, "linear_attention_total": 2359296, "linear_attention_nnz": 419840, "linear_dense_total": 4718592, "linear_dense_nnz": 1756160}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1011712, "linear_attention_total": 2359296, "linear_attention_nnz": 411648, "linear_dense_total": 4718592, "linear_dense_nnz": 600064}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 903168, "linear_attention_total": 2359296, "linear_attention_nnz": 307200, "linear_dense_total": 4718592, "linear_dense_nnz": 595968}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 823296, "linear_attention_total": 2359296, "linear_attention_nnz": 207872, "linear_dense_total": 4718592, "linear_dense_nnz": 615424}}, "total_sparsity": 40.85862268737366, "linear_sparsity": 52.35218942901234}, "speed": {"eval_elapsed_time": 25.23966666101478}, "opt_eval_metrics": {"exact_match": 79.29990539262063, "f1": 87.09851869948527}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 64383586, "linear_total": 84934656, "linear_nnz": 40452096, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3881984, "linear_attention_total": 2359296, "linear_attention_nnz": 501760, "linear_dense_total": 4718592, "linear_dense_nnz": 3380224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4185088, "linear_attention_total": 2359296, "linear_attention_nnz": 528384, "linear_dense_total": 4718592, "linear_dense_nnz": 3656704}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4703232, "linear_attention_total": 2359296, "linear_attention_nnz": 581632, "linear_dense_total": 4718592, "linear_dense_nnz": 4121600}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5060608, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 4144128}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4893696, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 4060160}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4817920, "linear_attention_total": 2359296, "linear_attention_nnz": 741376, "linear_dense_total": 4718592, "linear_dense_nnz": 4076544}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4459520, "linear_attention_total": 2359296, "linear_attention_nnz": 644096, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3720192, "linear_attention_total": 2359296, "linear_attention_nnz": 757760, "linear_dense_total": 4718592, "linear_dense_nnz": 2962432}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2070528, "linear_attention_total": 2359296, "linear_attention_nnz": 380928, "linear_dense_total": 4718592, "linear_dense_nnz": 1689600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 966656, "linear_attention_total": 2359296, "linear_attention_nnz": 395264, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 874496, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 561152}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818176, "linear_attention_total": 2359296, "linear_attention_nnz": 200704, "linear_dense_total": 4718592, "linear_dense_nnz": 617472}}, "total_sparsity": 40.874550222086434, "linear_sparsity": 52.37268518518518}, "speed": {"eval_elapsed_time": 25.169638738036156}, "opt_eval_metrics": {"exact_match": 79.22421948912014, "f1": 87.0664817371684}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41117954, "linear_total": 84934656, "linear_nnz": 17159424, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879296, "linear_attention_total": 2359296, "linear_attention_nnz": 1459968, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1487616, "linear_attention_total": 2359296, "linear_attention_nnz": 930048, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2451456, "linear_attention_total": 2359296, "linear_attention_nnz": 1651200, "linear_dense_total": 4718592, "linear_dense_nnz": 800256}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959168, "linear_attention_total": 2359296, "linear_attention_nnz": 1181952, "linear_dense_total": 4718592, "linear_dense_nnz": 777216}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1876992, "linear_attention_total": 2359296, "linear_attention_nnz": 996864, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 720384, "linear_dense_total": 4718592, "linear_dense_nnz": 886272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1242624, "linear_attention_total": 2359296, "linear_attention_nnz": 595968, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1026048, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1362432, "linear_attention_total": 2359296, "linear_attention_nnz": 1029120, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784128, "linear_attention_total": 2359296, "linear_attention_nnz": 673536, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563712, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.240103802270966, "linear_sparsity": 79.7969111689815}, "speed": {"eval_elapsed_time": 14.573690482182428}, "opt_eval_metrics": {"exact_match": 78.00378429517502, "f1": 85.86131877012127}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41037314, "linear_total": 84934656, "linear_nnz": 17078784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1881600, "linear_attention_total": 2359296, "linear_attention_nnz": 1460736, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488384, "linear_attention_total": 2359296, "linear_attention_nnz": 930816, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2430720, "linear_attention_total": 2359296, "linear_attention_nnz": 1636608, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1953024, "linear_attention_total": 2359296, "linear_attention_nnz": 1172736, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1822464, "linear_attention_total": 2359296, "linear_attention_nnz": 946944, "linear_dense_total": 4718592, "linear_dense_nnz": 875520}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602816, "linear_attention_total": 2359296, "linear_attention_nnz": 719616, "linear_dense_total": 4718592, "linear_dense_nnz": 883200}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1248768, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023744, "linear_attention_total": 2359296, "linear_attention_nnz": 530688, "linear_dense_total": 4718592, "linear_dense_nnz": 493056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360128, "linear_attention_total": 2359296, "linear_attention_nnz": 1026816, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 785664, "linear_attention_total": 2359296, "linear_attention_nnz": 675072, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 562176, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.31415802270676, "linear_sparsity": 79.89185474537037}, "speed": {"eval_elapsed_time": 14.54654596094042}, "opt_eval_metrics": {"exact_match": 78.04162724692526, "f1": 85.89832211406967}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 65744386, "linear_total": 84934656, "linear_nnz": 41809920, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3775488, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 3140608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4079616, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 3477504}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4937728, "linear_attention_total": 2359296, "linear_attention_nnz": 1008640, "linear_dense_total": 4718592, "linear_dense_nnz": 3929088}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5286912, "linear_attention_total": 2359296, "linear_attention_nnz": 1197056, "linear_dense_total": 4718592, "linear_dense_nnz": 4089856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5135360, "linear_attention_total": 2359296, "linear_attention_nnz": 1181696, "linear_dense_total": 4718592, "linear_dense_nnz": 3953664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5012480, "linear_attention_total": 2359296, "linear_attention_nnz": 1005568, "linear_dense_total": 4718592, "linear_dense_nnz": 4006912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4720640, "linear_attention_total": 2359296, "linear_attention_nnz": 1043456, "linear_dense_total": 4718592, "linear_dense_nnz": 3677184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3708928, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 2777088}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2311168, "linear_attention_total": 2359296, "linear_attention_nnz": 862208, "linear_dense_total": 4718592, "linear_dense_nnz": 1448960}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1058816, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 458752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 951296, "linear_attention_total": 2359296, "linear_attention_nnz": 456704, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 831488, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 541696}}, "total_sparsity": 39.6248852522324, "linear_sparsity": 50.774016203703695}, "speed": {"eval_elapsed_time": 27.04506094707176}, "opt_eval_metrics": {"exact_match": 80.48249763481552, "f1": 87.91705961229685}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 38110440, "linear_total": 84934656, "linear_nnz": 14151910, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1521793, "linear_attention_total": 2359296, "linear_attention_nnz": 87221, "linear_dense_total": 4718592, "linear_dense_nnz": 1434572}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637844, "linear_attention_total": 2359296, "linear_attention_nnz": 157517, "linear_dense_total": 4718592, "linear_dense_nnz": 1480327}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1723746, "linear_attention_total": 2359296, "linear_attention_nnz": 188172, "linear_dense_total": 4718592, "linear_dense_nnz": 1535574}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742961, "linear_attention_total": 2359296, "linear_attention_nnz": 230341, "linear_dense_total": 4718592, "linear_dense_nnz": 1512620}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1687428, "linear_attention_total": 2359296, "linear_attention_nnz": 240387, "linear_dense_total": 4718592, "linear_dense_nnz": 1447041}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1623377, "linear_attention_total": 2359296, "linear_attention_nnz": 195780, "linear_dense_total": 4718592, "linear_dense_nnz": 1427597}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1429982, "linear_attention_total": 2359296, "linear_attention_nnz": 184963, "linear_dense_total": 4718592, "linear_dense_nnz": 1245019}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130199, "linear_attention_total": 2359296, "linear_attention_nnz": 172954, "linear_dense_total": 4718592, "linear_dense_nnz": 957245}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 773896, "linear_attention_total": 2359296, "linear_attention_nnz": 138133, "linear_dense_total": 4718592, "linear_dense_nnz": 635763}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417863, "linear_attention_total": 2359296, "linear_attention_nnz": 112972, "linear_dense_total": 4718592, "linear_dense_nnz": 304891}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279992, "linear_attention_total": 2359296, "linear_attention_nnz": 75446, "linear_dense_total": 4718592, "linear_dense_nnz": 204546}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 182829, "linear_attention_total": 2359296, "linear_attention_nnz": 38439, "linear_dense_total": 4718592, "linear_dense_nnz": 144390}}, "total_sparsity": 65.00199746198996, "linear_sparsity": 83.3378850677867}, "speed": {"eval_elapsed_time": 35.92588178999722}, "opt_eval_metrics": {"exact_match": 80.22705771050141, "f1": 88.08154392563726}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37366845, "linear_total": 84934656, "linear_nnz": 13408315, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1442154, "linear_attention_total": 2359296, "linear_attention_nnz": 79341, "linear_dense_total": 4718592, "linear_dense_nnz": 1362813}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1557975, "linear_attention_total": 2359296, "linear_attention_nnz": 146964, "linear_dense_total": 4718592, "linear_dense_nnz": 1411011}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637409, "linear_attention_total": 2359296, "linear_attention_nnz": 173655, "linear_dense_total": 4718592, "linear_dense_nnz": 1463754}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1655712, "linear_attention_total": 2359296, "linear_attention_nnz": 213353, "linear_dense_total": 4718592, "linear_dense_nnz": 1442359}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601748, "linear_attention_total": 2359296, "linear_attention_nnz": 221518, "linear_dense_total": 4718592, "linear_dense_nnz": 1380230}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1539647, "linear_attention_total": 2359296, "linear_attention_nnz": 179373, "linear_dense_total": 4718592, "linear_dense_nnz": 1360274}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1352289, "linear_attention_total": 2359296, "linear_attention_nnz": 168393, "linear_dense_total": 4718592, "linear_dense_nnz": 1183896}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1066215, "linear_attention_total": 2359296, "linear_attention_nnz": 159612, "linear_dense_total": 4718592, "linear_dense_nnz": 906603}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727923, "linear_attention_total": 2359296, "linear_attention_nnz": 127230, "linear_dense_total": 4718592, "linear_dense_nnz": 600693}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 390947, "linear_attention_total": 2359296, "linear_attention_nnz": 105257, "linear_dense_total": 4718592, "linear_dense_nnz": 285690}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 262617, "linear_attention_total": 2359296, "linear_attention_nnz": 70746, "linear_dense_total": 4718592, "linear_dense_nnz": 191871}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 173679, "linear_attention_total": 2359296, "linear_attention_nnz": 36271, "linear_dense_total": 4718592, "linear_dense_nnz": 137408}}, "total_sparsity": 65.68486388119823, "linear_sparsity": 84.21337575088313}, "speed": {"eval_elapsed_time": 35.89134427602403}, "opt_eval_metrics": {"exact_match": 80.53926206244087, "f1": 88.07603620459668}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 45252556, "linear_total": 84934656, "linear_nnz": 21294026, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2152743, "linear_attention_total": 2359296, "linear_attention_nnz": 158912, "linear_dense_total": 4718592, "linear_dense_nnz": 1993831}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2265132, "linear_attention_total": 2359296, "linear_attention_nnz": 234395, "linear_dense_total": 4718592, "linear_dense_nnz": 2030737}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415512, "linear_attention_total": 2359296, "linear_attention_nnz": 301048, "linear_dense_total": 4718592, "linear_dense_nnz": 2114464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465567, "linear_attention_total": 2359296, "linear_attention_nnz": 358791, "linear_dense_total": 4718592, "linear_dense_nnz": 2106776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2457267, "linear_attention_total": 2359296, "linear_attention_nnz": 398673, "linear_dense_total": 4718592, "linear_dense_nnz": 2058594}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2410577, "linear_attention_total": 2359296, "linear_attention_nnz": 367333, "linear_dense_total": 4718592, "linear_dense_nnz": 2043244}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2206780, "linear_attention_total": 2359296, "linear_attention_nnz": 344288, "linear_dense_total": 4718592, "linear_dense_nnz": 1862492}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1819031, "linear_attention_total": 2359296, "linear_attention_nnz": 304514, "linear_dense_total": 4718592, "linear_dense_nnz": 1514517}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1364821, "linear_attention_total": 2359296, "linear_attention_nnz": 265513, "linear_dense_total": 4718592, "linear_dense_nnz": 1099308}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 828990, "linear_attention_total": 2359296, "linear_attention_nnz": 201714, "linear_dense_total": 4718592, "linear_dense_nnz": 627276}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574541, "linear_attention_total": 2359296, "linear_attention_nnz": 134277, "linear_dense_total": 4718592, "linear_dense_nnz": 440264}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 333065, "linear_attention_total": 2359296, "linear_attention_nnz": 63309, "linear_dense_total": 4718592, "linear_dense_nnz": 269756}}, "total_sparsity": 58.4431701722824, "linear_sparsity": 74.92893124804085}, "speed": {"eval_elapsed_time": 38.17887881118804}, "opt_eval_metrics": {"exact_match": 81.40018921475875, "f1": 88.66263407974378}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 38467586, "linear_total": 84934656, "linear_nnz": 14509056, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1740288, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 940032, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 448512}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1992192, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1728000, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1651200, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 717312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245696, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877056, "linear_attention_total": 2359296, "linear_attention_nnz": 442368, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049088, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 629760, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 233472}}, "total_sparsity": 64.67401918059409, "linear_sparsity": 82.9173900462963}, "speed": {"eval_elapsed_time": 11.783776527037844}, "opt_eval_metrics": {"exact_match": 77.9848628192999, "f1": 85.88807770994393}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 38065154, "linear_total": 84934656, "linear_nnz": 14106624, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1669632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 913920, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 422400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969152, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 631296}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1559040, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1219584, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 471552}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 955392, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1090560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 431616, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.04358500448319, "linear_sparsity": 83.3912037037037}, "speed": {"eval_elapsed_time": 11.86458179494366}, "opt_eval_metrics": {"exact_match": 77.94701986754967, "f1": 85.90050035022541}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38095874, "linear_total": 84934656, "linear_nnz": 14137344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907776, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1967616, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 591360}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1711104, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1608192, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1214976, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 625152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1161216, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 953856, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 413184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1041408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 482304, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 757248, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.0153738728886, "linear_sparsity": 83.35503472222221}, "speed": {"eval_elapsed_time": 11.63978576194495}, "opt_eval_metrics": {"exact_match": 77.43614001892148, "f1": 85.51882546766822}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35435778, "linear_total": 84934656, "linear_nnz": 11477248, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887040, "linear_attention_total": 2359296, "linear_attention_nnz": 384768, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1057792, "linear_attention_total": 2359296, "linear_attention_nnz": 355840, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1285888, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497088, "linear_attention_total": 2359296, "linear_attention_nnz": 672256, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1350912, "linear_attention_total": 2359296, "linear_attention_nnz": 418560, "linear_dense_total": 4718592, "linear_dense_nnz": 932352}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1395712, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1154816, "linear_attention_total": 2359296, "linear_attention_nnz": 498944, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059840, "linear_attention_total": 2359296, "linear_attention_nnz": 497664, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 609024, "linear_attention_total": 2359296, "linear_attention_nnz": 297216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 436224, "linear_attention_total": 2359296, "linear_attention_nnz": 316416, "linear_dense_total": 4718592, "linear_dense_nnz": 119808}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 256512, "linear_dense_total": 4718592, "linear_dense_nnz": 115200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371200, "linear_attention_total": 2359296, "linear_attention_nnz": 150016, "linear_dense_total": 4718592, "linear_dense_nnz": 221184}}, "total_sparsity": 67.45822277621669, "linear_sparsity": 86.4869671103395}, "speed": {"eval_elapsed_time": 13.864284622017294}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.28341140334766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 49759613, "linear_total": 84934656, "linear_nnz": 25846272, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3251712, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2803200, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1918464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3320832, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3353088, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2075136}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2469888, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2322432, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1929216}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2098176, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1508352}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1641984, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1248768}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638912, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566784, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1075200, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 583680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1304064, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1009152}}, "total_sparsity": 54.304199529987116, "linear_sparsity": 69.56922743055556}, "speed": {"eval_elapsed_time": 14.753634401829913}, "opt_eval_metrics": {"exact_match": 77.8713339640492, "f1": 85.86552240887988}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 47529298, "linear_total": 84934656, "linear_nnz": 23617536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2446848, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1660416}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2978304, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1995264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2216448, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1724928}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1824768, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1333248}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526784, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1133568}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35236717199184, "linear_sparsity": 72.19328703703704}, "speed": {"eval_elapsed_time": 14.023887678980827}, "opt_eval_metrics": {"exact_match": 78.06054872280038, "f1": 85.94002543374285}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47521613, "linear_total": 84934656, "linear_nnz": 23609856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2445312, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1658880}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2976768, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1993728}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214912, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1723392}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1823232, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1525248, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1132032}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35942454654601, "linear_sparsity": 72.2023292824074}, "speed": {"eval_elapsed_time": 14.008215571055189}, "opt_eval_metrics": {"exact_match": 78.10785241248817, "f1": 86.00835164251778}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35398714, "linear_total": 84934656, "linear_nnz": 11493376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907264, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1074176, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253376, "linear_attention_total": 2359296, "linear_attention_nnz": 402432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1508352, "linear_attention_total": 2359296, "linear_attention_nnz": 681984, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1328640, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1422848, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1094656, "linear_attention_total": 2359296, "linear_attention_nnz": 449536, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1102848, "linear_attention_total": 2359296, "linear_attention_nnz": 577536, "linear_dense_total": 4718592, "linear_dense_nnz": 525312}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 628224, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 434176, "linear_attention_total": 2359296, "linear_attention_nnz": 320512, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 377344, "linear_attention_total": 2359296, "linear_attention_nnz": 256000, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 361472, "linear_attention_total": 2359296, "linear_attention_nnz": 146432, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 67.49225980035152, "linear_sparsity": 86.46797839506173}, "speed": {"eval_elapsed_time": 13.743516992079094}, "opt_eval_metrics": {"exact_match": 77.18070009460737, "f1": 85.6109462422114}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold_apme-sigmoied_threshold_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 61067266, "linear_total": 84934656, "linear_nnz": 37108736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3221504, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 2607104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3504128, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 2899968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4439040, "linear_attention_total": 2359296, "linear_attention_nnz": 730112, "linear_dense_total": 4718592, "linear_dense_nnz": 3708928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4859904, "linear_attention_total": 2359296, "linear_attention_nnz": 1044480, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4734976, "linear_attention_total": 2359296, "linear_attention_nnz": 1012736, "linear_dense_total": 4718592, "linear_dense_nnz": 3722240}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4660224, "linear_attention_total": 2359296, "linear_attention_nnz": 882688, "linear_dense_total": 4718592, "linear_dense_nnz": 3777536}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4239360, "linear_attention_total": 2359296, "linear_attention_nnz": 980992, "linear_dense_total": 4718592, "linear_dense_nnz": 3258368}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3137536, "linear_attention_total": 2359296, "linear_attention_nnz": 903168, "linear_dense_total": 4718592, "linear_dense_nnz": 2234368}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835008, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 1124352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877568, "linear_attention_total": 2359296, "linear_attention_nnz": 552960, "linear_dense_total": 4718592, "linear_dense_nnz": 324608}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852992, "linear_attention_total": 2359296, "linear_attention_nnz": 401408, "linear_dense_total": 4718592, "linear_dense_nnz": 451584}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746496, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 501760}}, "total_sparsity": 43.920030037508496, "linear_sparsity": 56.309076003086425}, "speed": {"eval_elapsed_time": 25.03221789188683}, "opt_eval_metrics": {"exact_match": 79.64049195837275, "f1": 87.40026291426761}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 43891202, "linear_total": 84934656, "linear_nnz": 19932672, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045184, "linear_attention_total": 2359296, "linear_attention_nnz": 427776, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102784, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 1708032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2424576, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 1955328}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2502912, "linear_attention_total": 2359296, "linear_attention_nnz": 579840, "linear_dense_total": 4718592, "linear_dense_nnz": 1923072}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2376960, "linear_attention_total": 2359296, "linear_attention_nnz": 539904, "linear_dense_total": 4718592, "linear_dense_nnz": 1837056}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2201856, "linear_attention_total": 2359296, "linear_attention_nnz": 424704, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1907712, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 1468416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1580544, "linear_attention_total": 2359296, "linear_attention_nnz": 428544, "linear_dense_total": 4718592, "linear_dense_nnz": 1152000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1095168, "linear_attention_total": 2359296, "linear_attention_nnz": 397824, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527616, "linear_attention_total": 2359296, "linear_attention_nnz": 235776, "linear_dense_total": 4718592, "linear_dense_nnz": 291840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 428544, "linear_attention_total": 2359296, "linear_attention_nnz": 182784, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738816, "linear_attention_total": 2359296, "linear_attention_nnz": 112128, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}}, "total_sparsity": 59.6933438975695, "linear_sparsity": 76.53175636574075}, "speed": {"eval_elapsed_time": 23.130286294035614}, "opt_eval_metrics": {"exact_match": 79.15799432355723, "f1": 86.94169166073364}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49229570, "linear_total": 84934656, "linear_nnz": 25271040, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214400, "linear_attention_total": 2359296, "linear_attention_nnz": 721408, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2390784, "linear_attention_total": 2359296, "linear_attention_nnz": 635136, "linear_dense_total": 4718592, "linear_dense_nnz": 1755648}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850560, "linear_attention_total": 2359296, "linear_attention_nnz": 972032, "linear_dense_total": 4718592, "linear_dense_nnz": 1878528}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3188736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 1932288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3149824, "linear_attention_total": 2359296, "linear_attention_nnz": 1260544, "linear_dense_total": 4718592, "linear_dense_nnz": 1889280}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 1784832}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2455040, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015744, "linear_attention_total": 2359296, "linear_attention_nnz": 988160, "linear_dense_total": 4718592, "linear_dense_nnz": 1027584}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1550080, "linear_attention_total": 2359296, "linear_attention_nnz": 903424, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 886784, "linear_attention_total": 2359296, "linear_attention_nnz": 636416, "linear_dense_total": 4718592, "linear_dense_nnz": 250368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 682752, "linear_attention_total": 2359296, "linear_attention_nnz": 484608, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 980224, "linear_attention_total": 2359296, "linear_attention_nnz": 313600, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}}, "total_sparsity": 54.79095450471988, "linear_sparsity": 70.2464916087963}, "speed": {"eval_elapsed_time": 20.265998144168407}, "opt_eval_metrics": {"exact_match": 80.6244087038789, "f1": 88.07723643002453}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 42173698, "linear_total": 84934656, "linear_nnz": 18215168, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1516544, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1758464, "linear_attention_total": 2359296, "linear_attention_nnz": 564992, "linear_dense_total": 4718592, "linear_dense_nnz": 1193472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2030080, "linear_attention_total": 2359296, "linear_attention_nnz": 646144, "linear_dense_total": 4718592, "linear_dense_nnz": 1383936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2328832, "linear_attention_total": 2359296, "linear_attention_nnz": 969472, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2283264, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1425408}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2099200, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 1396224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1846784, "linear_attention_total": 2359296, "linear_attention_nnz": 774656, "linear_dense_total": 4718592, "linear_dense_nnz": 1072128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1589760, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 783360}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 967424, "linear_attention_total": 2359296, "linear_attention_nnz": 520448, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 617216, "linear_attention_total": 2359296, "linear_attention_nnz": 435968, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 521984, "linear_attention_total": 2359296, "linear_attention_nnz": 354560, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655616, "linear_attention_total": 2359296, "linear_attention_nnz": 231680, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.27058124647028, "linear_sparsity": 78.55390383873457}, "speed": {"eval_elapsed_time": 16.755018649157137}, "opt_eval_metrics": {"exact_match": 79.25260170293284, "f1": 86.93528973939952}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42038274, "linear_total": 84934656, "linear_nnz": 18079744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1493248, "linear_attention_total": 2359296, "linear_attention_nnz": 519424, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757440, "linear_attention_total": 2359296, "linear_attention_nnz": 565504, "linear_dense_total": 4718592, "linear_dense_nnz": 1191936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028800, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1382400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2297088, "linear_attention_total": 2359296, "linear_attention_nnz": 937728, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2270464, "linear_attention_total": 2359296, "linear_attention_nnz": 846592, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081792, "linear_attention_total": 2359296, "linear_attention_nnz": 688640, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815296, "linear_attention_total": 2359296, "linear_attention_nnz": 744704, "linear_dense_total": 4718592, "linear_dense_nnz": 1070592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1613312, "linear_attention_total": 2359296, "linear_attention_nnz": 831488, "linear_dense_total": 4718592, "linear_dense_nnz": 781824}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 522496, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 594944, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 513792, "linear_attention_total": 2359296, "linear_attention_nnz": 346368, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 644096, "linear_attention_total": 2359296, "linear_attention_nnz": 220160, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.39494531824976, "linear_sparsity": 78.7133487654321}, "speed": {"eval_elapsed_time": 16.721035415073857}, "opt_eval_metrics": {"exact_match": 79.2620624408704, "f1": 86.97825692623259}}}, "base_speed_report": {"eval_elapsed_time": 38.708956059999764}} \ No newline at end of file diff --git a/analysis/files/results/results12.json b/analysis/files/results/results12.json deleted file mode 100644 index ad70f6bb..00000000 --- a/analysis/files/results/results12.json +++ /dev/null @@ -1 +0,0 @@ -{"checkpoints": {"/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": {"stats": {"total": 90984386, "nnz": 40951962, "linear_total": 67043328, "linear_nnz": 17043456, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1268736, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "1": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1296384, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "2": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1440768, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "3": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2006016, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "4": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1709568, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1863168, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "6": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1628160, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1901568, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 525312}, "8": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 923136, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1096704, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1104384, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 804864, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 54.990121052199, "linear_sparsity": 74.57844574780059}, "speed": {"eval_elapsed_time": 14.008800629992038}, "opt_eval_metrics": {"exact_match": 78.63765373699148, "f1": 86.69392512957342}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-20000": {"stats": {"total": 99840386, "nnz": 50390485, "linear_total": 75890688, "linear_nnz": 26472960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1903104, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 526848}, "1": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1735680, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 752640}, "2": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2840064, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 873984}, "3": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2721792, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "4": {"total": 768, "nnz": 768, "linear_total": 6881280, "linear_nnz": 3208704, "linear_attention_total": 2162688, "linear_attention_nnz": 2162688, "linear_dense_total": 4718592, "linear_dense_nnz": 1046016}, "5": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2952192, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 986112}, "6": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2509824, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 740352}, "7": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2131968, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "8": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2259456, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1293312, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 1671168, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 98304}, "11": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1245696, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}}, "total_sparsity": 49.52895614806617, "linear_sparsity": 65.11698510362694}, "speed": {"eval_elapsed_time": 19.633264974225312}, "opt_eval_metrics": {"exact_match": 80.86092715231788, "f1": 88.26868699204444}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-22132": {"stats": {"total": 99840386, "nnz": 50390485, "linear_total": 75890688, "linear_nnz": 26472960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1903104, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 526848}, "1": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1735680, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 752640}, "2": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2840064, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 873984}, "3": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2721792, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "4": {"total": 768, "nnz": 768, "linear_total": 6881280, "linear_nnz": 3208704, "linear_attention_total": 2162688, "linear_attention_nnz": 2162688, "linear_dense_total": 4718592, "linear_dense_nnz": 1046016}, "5": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2952192, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 986112}, "6": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2509824, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 740352}, "7": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2131968, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "8": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2259456, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1293312, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 1671168, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 98304}, "11": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1245696, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}}, "total_sparsity": 49.52895614806617, "linear_sparsity": 65.11698510362694}, "speed": {"eval_elapsed_time": 19.62449131719768}, "opt_eval_metrics": {"exact_match": 80.87038789025544, "f1": 88.24613086360249}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-15000": {"stats": {"total": 95510786, "nnz": 52448657, "linear_total": 71565312, "linear_nnz": 28531200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2125824, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2357760, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3317760, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3334656, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3495936, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2809344, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2646528, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2363904, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2119680, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1428480, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1367040, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1164288, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 45.08614241746477, "linear_sparsity": 60.1326407967033}, "speed": {"eval_elapsed_time": 18.459927490912378}, "opt_eval_metrics": {"exact_match": 80.80416272469253, "f1": 88.20260662536118}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-20000": {"stats": {"total": 95510786, "nnz": 52448657, "linear_total": 71565312, "linear_nnz": 28531200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2125824, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2357760, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3317760, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3334656, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3495936, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2809344, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2646528, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2363904, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2119680, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1428480, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1367040, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1164288, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 45.08614241746477, "linear_sparsity": 60.1326407967033}, "speed": {"eval_elapsed_time": 18.49241598788649}, "opt_eval_metrics": {"exact_match": 80.6717123935667, "f1": 88.128983727943}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": {"stats": {"total": 95510786, "nnz": 52448657, "linear_total": 71565312, "linear_nnz": 28531200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2125824, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2357760, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3317760, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3334656, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3495936, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2809344, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2646528, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2363904, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2119680, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1428480, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1367040, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1164288, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 45.08614241746477, "linear_sparsity": 60.1326407967033}, "speed": {"eval_elapsed_time": 18.51839367300272}, "opt_eval_metrics": {"exact_match": 80.68117313150425, "f1": 88.11014400914335}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-22132": {"stats": {"total": 93345986, "nnz": 42356011, "linear_total": 69402624, "linear_nnz": 18445824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1198080, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1379328, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1878528, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "3": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2090496, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 714240}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2210304, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1726464, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 743424}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1747968, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1826304, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "8": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1443840, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 264192}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1084416, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 101376}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1070592, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 789504, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}}, "total_sparsity": 54.62471091151151, "linear_sparsity": 73.42200779036827}, "speed": {"eval_elapsed_time": 14.921356644947082}, "opt_eval_metrics": {"exact_match": 79.4228949858089, "f1": 87.22907143184382}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-20000": {"stats": {"total": 93149186, "nnz": 46161559, "linear_total": 69206016, "linear_nnz": 22248960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1634304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1887744, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2400768, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2588160, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2655744, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 2199552, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2131968, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2092032, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1417728, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1155072, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1130496, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 955392, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 50.443411282198426, "linear_sparsity": 67.85111860795455}, "speed": {"eval_elapsed_time": 15.946462976979092}, "opt_eval_metrics": {"exact_match": 80.01892147587512, "f1": 87.70568682399205}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": {"stats": {"total": 93149186, "nnz": 46161559, "linear_total": 69206016, "linear_nnz": 22248960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1634304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1887744, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2400768, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2588160, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2655744, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 2199552, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2131968, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2092032, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1417728, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1155072, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1130496, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 955392, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 50.443411282198426, "linear_sparsity": 67.85111860795455}, "speed": {"eval_elapsed_time": 15.954481962835416}, "opt_eval_metrics": {"exact_match": 80.02838221381268, "f1": 87.70940223967354}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15/checkpoint-22132": {"stats": {"total": 93739586, "nnz": 40333447, "linear_total": 69795840, "linear_nnz": 16424448, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 967680, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1085952, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 299520}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1586688, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 407040}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2013696, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1872384, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1416192, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1517568, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1645056, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 268800}, "8": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1534464, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1056768, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 73728}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1041408, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 58368}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 686592, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 56.972876965767696, "linear_sparsity": 76.46786971830986}, "speed": {"eval_elapsed_time": 14.457574162865058}, "opt_eval_metrics": {"exact_match": 78.82686849574267, "f1": 86.75497848244157}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40/checkpoint-22132": {"stats": {"total": 97281986, "nnz": 45486623, "linear_total": 73334784, "linear_nnz": 21573120, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1477632, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}, "1": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1466880, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 483840}, "2": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2388480, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 619008}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2230272, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 657408}, "4": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2671104, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 705024}, "5": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2241024, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 668160}, "6": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2088960, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 516096}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1760256, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 384000}, "8": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 1973760, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 204288}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1271808, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1253376, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 73728}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 749568, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 159744}}, "total_sparsity": 53.24250164876363, "linear_sparsity": 70.58269101876675}, "speed": {"eval_elapsed_time": 17.261011745082214}, "opt_eval_metrics": {"exact_match": 80.23651844843897, "f1": 87.68464122182475}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l20--2021-01-20--19-01-34/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 63685078, "linear_total": 84934656, "linear_nnz": 39741824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3854752, "linear_attention_total": 2359296, "linear_attention_nnz": 261808, "linear_dense_total": 4718592, "linear_dense_nnz": 3592944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4073232, "linear_attention_total": 2359296, "linear_attention_nnz": 407856, "linear_dense_total": 4718592, "linear_dense_nnz": 3665376}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4218016, "linear_attention_total": 2359296, "linear_attention_nnz": 470352, "linear_dense_total": 4718592, "linear_dense_nnz": 3747664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4318192, "linear_attention_total": 2359296, "linear_attention_nnz": 586320, "linear_dense_total": 4718592, "linear_dense_nnz": 3731872}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4294272, "linear_attention_total": 2359296, "linear_attention_nnz": 598112, "linear_dense_total": 4718592, "linear_dense_nnz": 3696160}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4191568, "linear_attention_total": 2359296, "linear_attention_nnz": 540976, "linear_dense_total": 4718592, "linear_dense_nnz": 3650592}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4018960, "linear_attention_total": 2359296, "linear_attention_nnz": 518320, "linear_dense_total": 4718592, "linear_dense_nnz": 3500640}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3692480, "linear_attention_total": 2359296, "linear_attention_nnz": 494608, "linear_dense_total": 4718592, "linear_dense_nnz": 3197872}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3064736, "linear_attention_total": 2359296, "linear_attention_nnz": 381872, "linear_dense_total": 4718592, "linear_dense_nnz": 2682864}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1882688, "linear_attention_total": 2359296, "linear_attention_nnz": 281888, "linear_dense_total": 4718592, "linear_dense_nnz": 1600800}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307728, "linear_attention_total": 2359296, "linear_attention_nnz": 212544, "linear_dense_total": 4718592, "linear_dense_nnz": 1095184}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 825200, "linear_attention_total": 2359296, "linear_attention_nnz": 122704, "linear_dense_total": 4718592, "linear_dense_nnz": 702496}}, "total_sparsity": 41.51601184669167, "linear_sparsity": 53.208942177854944}, "speed": {"eval_elapsed_time": 33.01966134808026}, "opt_eval_metrics": {"exact_match": 80.52980132450331, "f1": 88.02284574429551}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l20--2021-01-20--19-01-34/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 63495382, "linear_total": 84934656, "linear_nnz": 39552208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3842976, "linear_attention_total": 2359296, "linear_attention_nnz": 258016, "linear_dense_total": 4718592, "linear_dense_nnz": 3584960}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4064144, "linear_attention_total": 2359296, "linear_attention_nnz": 404784, "linear_dense_total": 4718592, "linear_dense_nnz": 3659360}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4202080, "linear_attention_total": 2359296, "linear_attention_nnz": 460752, "linear_dense_total": 4718592, "linear_dense_nnz": 3741328}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4301216, "linear_attention_total": 2359296, "linear_attention_nnz": 577184, "linear_dense_total": 4718592, "linear_dense_nnz": 3724032}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4277440, "linear_attention_total": 2359296, "linear_attention_nnz": 587792, "linear_dense_total": 4718592, "linear_dense_nnz": 3689648}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4172464, "linear_attention_total": 2359296, "linear_attention_nnz": 530480, "linear_dense_total": 4718592, "linear_dense_nnz": 3641984}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3999744, "linear_attention_total": 2359296, "linear_attention_nnz": 508336, "linear_dense_total": 4718592, "linear_dense_nnz": 3491408}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3673360, "linear_attention_total": 2359296, "linear_attention_nnz": 486304, "linear_dense_total": 4718592, "linear_dense_nnz": 3187056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3043376, "linear_attention_total": 2359296, "linear_attention_nnz": 374032, "linear_dense_total": 4718592, "linear_dense_nnz": 2669344}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1863968, "linear_attention_total": 2359296, "linear_attention_nnz": 276992, "linear_dense_total": 4718592, "linear_dense_nnz": 1586976}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1293056, "linear_attention_total": 2359296, "linear_attention_nnz": 209136, "linear_dense_total": 4718592, "linear_dense_nnz": 1083920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818384, "linear_attention_total": 2359296, "linear_attention_nnz": 120976, "linear_dense_total": 4718592, "linear_dense_nnz": 697408}}, "total_sparsity": 41.69021558428826, "linear_sparsity": 53.432191448447156}, "speed": {"eval_elapsed_time": 32.776620995020494}, "opt_eval_metrics": {"exact_match": 80.09460737937559, "f1": 87.80889686617203}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l40--2021-01-20--19-02-03/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 54109530, "linear_total": 84934656, "linear_nnz": 30171936, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3151120, "linear_attention_total": 2359296, "linear_attention_nnz": 172416, "linear_dense_total": 4718592, "linear_dense_nnz": 2978704}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3411360, "linear_attention_total": 2359296, "linear_attention_nnz": 308192, "linear_dense_total": 4718592, "linear_dense_nnz": 3103168}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3491136, "linear_attention_total": 2359296, "linear_attention_nnz": 285568, "linear_dense_total": 4718592, "linear_dense_nnz": 3205568}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3603168, "linear_attention_total": 2359296, "linear_attention_nnz": 437904, "linear_dense_total": 4718592, "linear_dense_nnz": 3165264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3410880, "linear_attention_total": 2359296, "linear_attention_nnz": 321040, "linear_dense_total": 4718592, "linear_dense_nnz": 3089840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3356416, "linear_attention_total": 2359296, "linear_attention_nnz": 332784, "linear_dense_total": 4718592, "linear_dense_nnz": 3023632}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3072896, "linear_attention_total": 2359296, "linear_attention_nnz": 288464, "linear_dense_total": 4718592, "linear_dense_nnz": 2784432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2683232, "linear_attention_total": 2359296, "linear_attention_nnz": 328464, "linear_dense_total": 4718592, "linear_dense_nnz": 2354768}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1957200, "linear_attention_total": 2359296, "linear_attention_nnz": 204832, "linear_dense_total": 4718592, "linear_dense_nnz": 1752368}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 905552, "linear_attention_total": 2359296, "linear_attention_nnz": 189616, "linear_dense_total": 4718592, "linear_dense_nnz": 715936}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667120, "linear_attention_total": 2359296, "linear_attention_nnz": 140384, "linear_dense_total": 4718592, "linear_dense_nnz": 526736}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 461856, "linear_attention_total": 2359296, "linear_attention_nnz": 84608, "linear_dense_total": 4718592, "linear_dense_nnz": 377248}}, "total_sparsity": 50.30953543778212, "linear_sparsity": 64.47629575376158}, "speed": {"eval_elapsed_time": 29.022300366079435}, "opt_eval_metrics": {"exact_match": 79.64049195837275, "f1": 87.31499809166372}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l40--2021-01-20--19-02-03/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53891686, "linear_total": 84934656, "linear_nnz": 29954112, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3130496, "linear_attention_total": 2359296, "linear_attention_nnz": 169136, "linear_dense_total": 4718592, "linear_dense_nnz": 2961360}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3393488, "linear_attention_total": 2359296, "linear_attention_nnz": 304464, "linear_dense_total": 4718592, "linear_dense_nnz": 3089024}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3470880, "linear_attention_total": 2359296, "linear_attention_nnz": 279216, "linear_dense_total": 4718592, "linear_dense_nnz": 3191664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3580464, "linear_attention_total": 2359296, "linear_attention_nnz": 429728, "linear_dense_total": 4718592, "linear_dense_nnz": 3150736}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3390736, "linear_attention_total": 2359296, "linear_attention_nnz": 314688, "linear_dense_total": 4718592, "linear_dense_nnz": 3076048}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3334432, "linear_attention_total": 2359296, "linear_attention_nnz": 326416, "linear_dense_total": 4718592, "linear_dense_nnz": 3008016}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3048464, "linear_attention_total": 2359296, "linear_attention_nnz": 281984, "linear_dense_total": 4718592, "linear_dense_nnz": 2766480}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2658992, "linear_attention_total": 2359296, "linear_attention_nnz": 320352, "linear_dense_total": 4718592, "linear_dense_nnz": 2338640}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1936656, "linear_attention_total": 2359296, "linear_attention_nnz": 200608, "linear_dense_total": 4718592, "linear_dense_nnz": 1736048}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 892160, "linear_attention_total": 2359296, "linear_attention_nnz": 185008, "linear_dense_total": 4718592, "linear_dense_nnz": 707152}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 660320, "linear_attention_total": 2359296, "linear_attention_nnz": 137920, "linear_dense_total": 4718592, "linear_dense_nnz": 522400}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 457024, "linear_attention_total": 2359296, "linear_attention_nnz": 82480, "linear_dense_total": 4718592, "linear_dense_nnz": 374544}}, "total_sparsity": 50.50958835936713, "linear_sparsity": 64.7327564380787}, "speed": {"eval_elapsed_time": 29.01672533689998}, "opt_eval_metrics": {"exact_match": 79.06338694418164, "f1": 86.86293366416082}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53990689, "linear_total": 84934656, "linear_nnz": 30067968, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2828544, "linear_attention_total": 2359296, "linear_attention_nnz": 880896, "linear_dense_total": 4718592, "linear_dense_nnz": 1947648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2819840, "linear_attention_total": 2359296, "linear_attention_nnz": 849152, "linear_dense_total": 4718592, "linear_dense_nnz": 1970688}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3367424, "linear_attention_total": 2359296, "linear_attention_nnz": 1169408, "linear_dense_total": 4718592, "linear_dense_nnz": 2198016}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3521280, "linear_attention_total": 2359296, "linear_attention_nnz": 1352448, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3693824, "linear_attention_total": 2359296, "linear_attention_nnz": 1524992, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3554560, "linear_attention_total": 2359296, "linear_attention_nnz": 1511680, "linear_dense_total": 4718592, "linear_dense_nnz": 2042880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2935296, "linear_attention_total": 2359296, "linear_attention_nnz": 1336320, "linear_dense_total": 4718592, "linear_dense_nnz": 1598976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2452992, "linear_attention_total": 2359296, "linear_attention_nnz": 1178112, "linear_dense_total": 4718592, "linear_dense_nnz": 1274880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1836032, "linear_attention_total": 2359296, "linear_attention_nnz": 1134080, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1024000, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 321024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 812032, "linear_attention_total": 2359296, "linear_attention_nnz": 583168, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222144, "linear_attention_total": 2359296, "linear_attention_nnz": 397312, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}}, "total_sparsity": 50.41867082482094, "linear_sparsity": 64.59870515046296}, "speed": {"eval_elapsed_time": 23.56436571292579}, "opt_eval_metrics": {"exact_match": 80.93661305581836, "f1": 88.35425478567389}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53994017, "linear_total": 84934656, "linear_nnz": 30071296, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2825984, "linear_attention_total": 2359296, "linear_attention_nnz": 878336, "linear_dense_total": 4718592, "linear_dense_nnz": 1947648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2823424, "linear_attention_total": 2359296, "linear_attention_nnz": 852736, "linear_dense_total": 4718592, "linear_dense_nnz": 1970688}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3366400, "linear_attention_total": 2359296, "linear_attention_nnz": 1168384, "linear_dense_total": 4718592, "linear_dense_nnz": 2198016}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3529216, "linear_attention_total": 2359296, "linear_attention_nnz": 1360384, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3694080, "linear_attention_total": 2359296, "linear_attention_nnz": 1525248, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3562240, "linear_attention_total": 2359296, "linear_attention_nnz": 1519360, "linear_dense_total": 4718592, "linear_dense_nnz": 2042880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2944768, "linear_attention_total": 2359296, "linear_attention_nnz": 1345792, "linear_dense_total": 4718592, "linear_dense_nnz": 1598976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2450176, "linear_attention_total": 2359296, "linear_attention_nnz": 1175296, "linear_dense_total": 4718592, "linear_dense_nnz": 1274880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828864, "linear_attention_total": 2359296, "linear_attention_nnz": 1126912, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023488, "linear_attention_total": 2359296, "linear_attention_nnz": 702464, "linear_dense_total": 4718592, "linear_dense_nnz": 321024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 812032, "linear_attention_total": 2359296, "linear_attention_nnz": 583168, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1210624, "linear_attention_total": 2359296, "linear_attention_nnz": 385792, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}}, "total_sparsity": 50.41561461889819, "linear_sparsity": 64.5947868441358}, "speed": {"eval_elapsed_time": 23.61654355400242}, "opt_eval_metrics": {"exact_match": 81.11636707663197, "f1": 88.26635621180897}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 48926434, "linear_total": 84934656, "linear_nnz": 25008128, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102784, "linear_attention_total": 2359296, "linear_attention_nnz": 976896, "linear_dense_total": 4718592, "linear_dense_nnz": 1125888}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2018816, "linear_attention_total": 2359296, "linear_attention_nnz": 733184, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2776064, "linear_attention_total": 2359296, "linear_attention_nnz": 1252352, "linear_dense_total": 4718592, "linear_dense_nnz": 1523712}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2993664, "linear_attention_total": 2359296, "linear_attention_nnz": 1437696, "linear_dense_total": 4718592, "linear_dense_nnz": 1555968}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3162624, "linear_attention_total": 2359296, "linear_attention_nnz": 1545216, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3089408, "linear_attention_total": 2359296, "linear_attention_nnz": 1574912, "linear_dense_total": 4718592, "linear_dense_nnz": 1514496}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2505216, "linear_attention_total": 2359296, "linear_attention_nnz": 1370112, "linear_dense_total": 4718592, "linear_dense_nnz": 1135104}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2026496, "linear_attention_total": 2359296, "linear_attention_nnz": 1178624, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1665536, "linear_attention_total": 2359296, "linear_attention_nnz": 1190912, "linear_dense_total": 4718592, "linear_dense_nnz": 474624}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 957440, "linear_attention_total": 2359296, "linear_attention_nnz": 748544, "linear_dense_total": 4718592, "linear_dense_nnz": 208896}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 805888, "linear_attention_total": 2359296, "linear_attention_nnz": 636928, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 904192, "linear_attention_total": 2359296, "linear_attention_nnz": 418816, "linear_dense_total": 4718592, "linear_dense_nnz": 485376}}, "total_sparsity": 55.069333723048565, "linear_sparsity": 70.55603780864197}, "speed": {"eval_elapsed_time": 21.182856186991557}, "opt_eval_metrics": {"exact_match": 80.79470198675497, "f1": 88.10958975740277}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 48725622, "linear_total": 84934656, "linear_nnz": 24807424, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2098688, "linear_attention_total": 2359296, "linear_attention_nnz": 991232, "linear_dense_total": 4718592, "linear_dense_nnz": 1107456}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2003456, "linear_attention_total": 2359296, "linear_attention_nnz": 730112, "linear_dense_total": 4718592, "linear_dense_nnz": 1273344}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2731008, "linear_attention_total": 2359296, "linear_attention_nnz": 1225728, "linear_dense_total": 4718592, "linear_dense_nnz": 1505280}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2977280, "linear_attention_total": 2359296, "linear_attention_nnz": 1433600, "linear_dense_total": 4718592, "linear_dense_nnz": 1543680}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3176448, "linear_attention_total": 2359296, "linear_attention_nnz": 1566720, "linear_dense_total": 4718592, "linear_dense_nnz": 1609728}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3081216, "linear_attention_total": 2359296, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1508352}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2487808, "linear_attention_total": 2359296, "linear_attention_nnz": 1361920, "linear_dense_total": 4718592, "linear_dense_nnz": 1125888}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2003456, "linear_attention_total": 2359296, "linear_attention_nnz": 1166336, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1617408, "linear_attention_total": 2359296, "linear_attention_nnz": 1148928, "linear_dense_total": 4718592, "linear_dense_nnz": 468480}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 945664, "linear_attention_total": 2359296, "linear_attention_nnz": 738304, "linear_dense_total": 4718592, "linear_dense_nnz": 207360}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 787456, "linear_attention_total": 2359296, "linear_attention_nnz": 624640, "linear_dense_total": 4718592, "linear_dense_nnz": 162816}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 897536, "linear_attention_total": 2359296, "linear_attention_nnz": 419840, "linear_dense_total": 4718592, "linear_dense_nnz": 477696}}, "total_sparsity": 55.25374562922606, "linear_sparsity": 70.79234182098766}, "speed": {"eval_elapsed_time": 21.17357637709938}, "opt_eval_metrics": {"exact_match": 80.48249763481552, "f1": 88.07285498416482}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 48790134, "linear_total": 84934656, "linear_nnz": 24871936, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2086400, "linear_attention_total": 2359296, "linear_attention_nnz": 978944, "linear_dense_total": 4718592, "linear_dense_nnz": 1107456}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1995264, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 1273344}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2738176, "linear_attention_total": 2359296, "linear_attention_nnz": 1232896, "linear_dense_total": 4718592, "linear_dense_nnz": 1505280}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2998784, "linear_attention_total": 2359296, "linear_attention_nnz": 1455104, "linear_dense_total": 4718592, "linear_dense_nnz": 1543680}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3208192, "linear_attention_total": 2359296, "linear_attention_nnz": 1598464, "linear_dense_total": 4718592, "linear_dense_nnz": 1609728}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3104768, "linear_attention_total": 2359296, "linear_attention_nnz": 1596416, "linear_dense_total": 4718592, "linear_dense_nnz": 1508352}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2499072, "linear_attention_total": 2359296, "linear_attention_nnz": 1373184, "linear_dense_total": 4718592, "linear_dense_nnz": 1125888}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2002432, "linear_attention_total": 2359296, "linear_attention_nnz": 1165312, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1631744, "linear_attention_total": 2359296, "linear_attention_nnz": 1163264, "linear_dense_total": 4718592, "linear_dense_nnz": 468480}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 947712, "linear_attention_total": 2359296, "linear_attention_nnz": 740352, "linear_dense_total": 4718592, "linear_dense_nnz": 207360}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 778240, "linear_attention_total": 2359296, "linear_attention_nnz": 615424, "linear_dense_total": 4718592, "linear_dense_nnz": 162816}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 881152, "linear_attention_total": 2359296, "linear_attention_nnz": 403456, "linear_dense_total": 4718592, "linear_dense_nnz": 477696}}, "total_sparsity": 55.19450225287742, "linear_sparsity": 70.71638695987654}, "speed": {"eval_elapsed_time": 21.167539164889604}, "opt_eval_metrics": {"exact_match": 80.70009460737937, "f1": 88.04831949879843}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l10--2021-01-20--19-01-04/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 72930262, "linear_total": 84934656, "linear_nnz": 48982384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4408688, "linear_attention_total": 2359296, "linear_attention_nnz": 428592, "linear_dense_total": 4718592, "linear_dense_nnz": 3980096}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4561328, "linear_attention_total": 2359296, "linear_attention_nnz": 545744, "linear_dense_total": 4718592, "linear_dense_nnz": 4015584}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4791104, "linear_attention_total": 2359296, "linear_attention_nnz": 729664, "linear_dense_total": 4718592, "linear_dense_nnz": 4061440}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4914112, "linear_attention_total": 2359296, "linear_attention_nnz": 851472, "linear_dense_total": 4718592, "linear_dense_nnz": 4062640}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5008736, "linear_attention_total": 2359296, "linear_attention_nnz": 960992, "linear_dense_total": 4718592, "linear_dense_nnz": 4047744}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4908864, "linear_attention_total": 2359296, "linear_attention_nnz": 902768, "linear_dense_total": 4718592, "linear_dense_nnz": 4006096}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4781792, "linear_attention_total": 2359296, "linear_attention_nnz": 861120, "linear_dense_total": 4718592, "linear_dense_nnz": 3920672}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4492512, "linear_attention_total": 2359296, "linear_attention_nnz": 759664, "linear_dense_total": 4718592, "linear_dense_nnz": 3732848}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4061488, "linear_attention_total": 2359296, "linear_attention_nnz": 670096, "linear_dense_total": 4718592, "linear_dense_nnz": 3391392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3105840, "linear_attention_total": 2359296, "linear_attention_nnz": 444064, "linear_dense_total": 4718592, "linear_dense_nnz": 2661776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2422000, "linear_attention_total": 2359296, "linear_attention_nnz": 329968, "linear_dense_total": 4718592, "linear_dense_nnz": 2092032}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1525920, "linear_attention_total": 2359296, "linear_attention_nnz": 190816, "linear_dense_total": 4718592, "linear_dense_nnz": 1335104}}, "total_sparsity": 33.025871793300276, "linear_sparsity": 42.329331386236504}, "speed": {"eval_elapsed_time": 35.11626772303134}, "opt_eval_metrics": {"exact_match": 81.47587511825922, "f1": 88.58172107792693}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l40--2021-01-20--19-00-35/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53211146, "linear_total": 84934656, "linear_nnz": 29278080, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2993984, "linear_attention_total": 2359296, "linear_attention_nnz": 241280, "linear_dense_total": 4718592, "linear_dense_nnz": 2752704}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3330688, "linear_attention_total": 2359296, "linear_attention_nnz": 379584, "linear_dense_total": 4718592, "linear_dense_nnz": 2951104}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3517120, "linear_attention_total": 2359296, "linear_attention_nnz": 322880, "linear_dense_total": 4718592, "linear_dense_nnz": 3194240}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3720576, "linear_attention_total": 2359296, "linear_attention_nnz": 565440, "linear_dense_total": 4718592, "linear_dense_nnz": 3155136}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3455168, "linear_attention_total": 2359296, "linear_attention_nnz": 390400, "linear_dense_total": 4718592, "linear_dense_nnz": 3064768}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3400192, "linear_attention_total": 2359296, "linear_attention_nnz": 406592, "linear_dense_total": 4718592, "linear_dense_nnz": 2993600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2988160, "linear_attention_total": 2359296, "linear_attention_nnz": 356480, "linear_dense_total": 4718592, "linear_dense_nnz": 2631680}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2477696, "linear_attention_total": 2359296, "linear_attention_nnz": 409920, "linear_dense_total": 4718592, "linear_dense_nnz": 2067776}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1612416, "linear_attention_total": 2359296, "linear_attention_nnz": 242048, "linear_dense_total": 4718592, "linear_dense_nnz": 1370368}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 697664, "linear_attention_total": 2359296, "linear_attention_nnz": 224896, "linear_dense_total": 4718592, "linear_dense_nnz": 472768}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591360, "linear_attention_total": 2359296, "linear_attention_nnz": 172352, "linear_dense_total": 4718592, "linear_dense_nnz": 419008}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 493056, "linear_attention_total": 2359296, "linear_attention_nnz": 104768, "linear_dense_total": 4718592, "linear_dense_nnz": 388288}}, "total_sparsity": 51.13454941064908, "linear_sparsity": 65.52870008680556}, "speed": {"eval_elapsed_time": 26.50232954812236}, "opt_eval_metrics": {"exact_match": 78.76064333017976, "f1": 86.75922108224064}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l20--2021-01-19--16-59-13/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 49113499, "linear_total": 84934656, "linear_nnz": 25174883, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2532837, "linear_attention_total": 2359296, "linear_attention_nnz": 278464, "linear_dense_total": 4718592, "linear_dense_nnz": 2254373}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2724403, "linear_attention_total": 2359296, "linear_attention_nnz": 411200, "linear_dense_total": 4718592, "linear_dense_nnz": 2313203}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2772181, "linear_attention_total": 2359296, "linear_attention_nnz": 388544, "linear_dense_total": 4718592, "linear_dense_nnz": 2383637}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2962889, "linear_attention_total": 2359296, "linear_attention_nnz": 616064, "linear_dense_total": 4718592, "linear_dense_nnz": 2346825}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2756799, "linear_attention_total": 2359296, "linear_attention_nnz": 475392, "linear_dense_total": 4718592, "linear_dense_nnz": 2281407}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2741284, "linear_attention_total": 2359296, "linear_attention_nnz": 485760, "linear_dense_total": 4718592, "linear_dense_nnz": 2255524}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2526246, "linear_attention_total": 2359296, "linear_attention_nnz": 436416, "linear_dense_total": 4718592, "linear_dense_nnz": 2089830}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2233017, "linear_attention_total": 2359296, "linear_attention_nnz": 473664, "linear_dense_total": 4718592, "linear_dense_nnz": 1759353}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652692, "linear_attention_total": 2359296, "linear_attention_nnz": 292096, "linear_dense_total": 4718592, "linear_dense_nnz": 1360596}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1056535, "linear_attention_total": 2359296, "linear_attention_nnz": 260864, "linear_dense_total": 4718592, "linear_dense_nnz": 795671}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 795434, "linear_attention_total": 2359296, "linear_attention_nnz": 207872, "linear_dense_total": 4718592, "linear_dense_nnz": 587562}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 420566, "linear_attention_total": 2359296, "linear_attention_nnz": 115648, "linear_dense_total": 4718592, "linear_dense_nnz": 304918}}, "total_sparsity": 54.89754611459343, "linear_sparsity": 70.35970452391072}, "speed": {"eval_elapsed_time": 29.429046569159254}, "opt_eval_metrics": {"exact_match": 80.15137180700094, "f1": 87.62280270760408}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 50398933, "linear_total": 84934656, "linear_nnz": 26460853, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673633, "linear_attention_total": 2359296, "linear_attention_nnz": 341248, "linear_dense_total": 4718592, "linear_dense_nnz": 2332385}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850180, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 2387588}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2871056, "linear_attention_total": 2359296, "linear_attention_nnz": 412672, "linear_dense_total": 4718592, "linear_dense_nnz": 2458384}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114067, "linear_attention_total": 2359296, "linear_attention_nnz": 692736, "linear_dense_total": 4718592, "linear_dense_nnz": 2421331}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2853594, "linear_attention_total": 2359296, "linear_attention_nnz": 505088, "linear_dense_total": 4718592, "linear_dense_nnz": 2348506}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2871518, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 2322654}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2608144, "linear_attention_total": 2359296, "linear_attention_nnz": 469504, "linear_dense_total": 4718592, "linear_dense_nnz": 2138640}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2382631, "linear_attention_total": 2359296, "linear_attention_nnz": 552448, "linear_dense_total": 4718592, "linear_dense_nnz": 1830183}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757175, "linear_attention_total": 2359296, "linear_attention_nnz": 316672, "linear_dense_total": 4718592, "linear_dense_nnz": 1440503}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1151305, "linear_attention_total": 2359296, "linear_attention_nnz": 292096, "linear_dense_total": 4718592, "linear_dense_nnz": 859209}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 873504, "linear_attention_total": 2359296, "linear_attention_nnz": 227328, "linear_dense_total": 4718592, "linear_dense_nnz": 646176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 454046, "linear_attention_total": 2359296, "linear_attention_nnz": 128000, "linear_dense_total": 4718592, "linear_dense_nnz": 326046}}, "total_sparsity": 53.71709208691902, "linear_sparsity": 68.84563469592435}, "speed": {"eval_elapsed_time": 28.692298884037882}, "opt_eval_metrics": {"exact_match": 79.92431409649953, "f1": 87.57193515884181}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 50358753, "linear_total": 84934656, "linear_nnz": 26420688, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2668105, "linear_attention_total": 2359296, "linear_attention_nnz": 335872, "linear_dense_total": 4718592, "linear_dense_nnz": 2332233}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2839080, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 2387496}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2866908, "linear_attention_total": 2359296, "linear_attention_nnz": 408576, "linear_dense_total": 4718592, "linear_dense_nnz": 2458332}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3103682, "linear_attention_total": 2359296, "linear_attention_nnz": 682496, "linear_dense_total": 4718592, "linear_dense_nnz": 2421186}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2853238, "linear_attention_total": 2359296, "linear_attention_nnz": 504832, "linear_dense_total": 4718592, "linear_dense_nnz": 2348406}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2880784, "linear_attention_total": 2359296, "linear_attention_nnz": 558336, "linear_dense_total": 4718592, "linear_dense_nnz": 2322448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2614378, "linear_attention_total": 2359296, "linear_attention_nnz": 475904, "linear_dense_total": 4718592, "linear_dense_nnz": 2138474}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2372808, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 1830088}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1752978, "linear_attention_total": 2359296, "linear_attention_nnz": 312576, "linear_dense_total": 4718592, "linear_dense_nnz": 1440402}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1147129, "linear_attention_total": 2359296, "linear_attention_nnz": 288000, "linear_dense_total": 4718592, "linear_dense_nnz": 859129}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870927, "linear_attention_total": 2359296, "linear_attention_nnz": 224768, "linear_dense_total": 4718592, "linear_dense_nnz": 646159}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 450671, "linear_attention_total": 2359296, "linear_attention_nnz": 124672, "linear_dense_total": 4718592, "linear_dense_nnz": 325999}}, "total_sparsity": 53.75399063078199, "linear_sparsity": 68.89292399088542}, "speed": {"eval_elapsed_time": 28.704244010150433}, "opt_eval_metrics": {"exact_match": 80.02838221381268, "f1": 87.5280353923367}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 58360680, "linear_total": 84934656, "linear_nnz": 34416900, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3232823, "linear_attention_total": 2359296, "linear_attention_nnz": 405824, "linear_dense_total": 4718592, "linear_dense_nnz": 2826999}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3412647, "linear_attention_total": 2359296, "linear_attention_nnz": 543872, "linear_dense_total": 4718592, "linear_dense_nnz": 2868775}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3556851, "linear_attention_total": 2359296, "linear_attention_nnz": 613248, "linear_dense_total": 4718592, "linear_dense_nnz": 2943603}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3708198, "linear_attention_total": 2359296, "linear_attention_nnz": 791424, "linear_dense_total": 4718592, "linear_dense_nnz": 2916774}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3695959, "linear_attention_total": 2359296, "linear_attention_nnz": 819072, "linear_dense_total": 4718592, "linear_dense_nnz": 2876887}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3644598, "linear_attention_total": 2359296, "linear_attention_nnz": 788928, "linear_dense_total": 4718592, "linear_dense_nnz": 2855670}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3486486, "linear_attention_total": 2359296, "linear_attention_nnz": 761600, "linear_dense_total": 4718592, "linear_dense_nnz": 2724886}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114460, "linear_attention_total": 2359296, "linear_attention_nnz": 686464, "linear_dense_total": 4718592, "linear_dense_nnz": 2427996}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2616038, "linear_attention_total": 2359296, "linear_attention_nnz": 602496, "linear_dense_total": 4718592, "linear_dense_nnz": 2013542}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1775741, "linear_attention_total": 2359296, "linear_attention_nnz": 381632, "linear_dense_total": 4718592, "linear_dense_nnz": 1394109}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1407393, "linear_attention_total": 2359296, "linear_attention_nnz": 325760, "linear_dense_total": 4718592, "linear_dense_nnz": 1081633}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 765706, "linear_attention_total": 2359296, "linear_attention_nnz": 174016, "linear_dense_total": 4718592, "linear_dense_nnz": 591690}}, "total_sparsity": 46.405572153982156, "linear_sparsity": 59.47837829589844}, "speed": {"eval_elapsed_time": 33.08102096617222}, "opt_eval_metrics": {"exact_match": 81.00283822138127, "f1": 88.2671108560581}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 58344499, "linear_total": 84934656, "linear_nnz": 34400721, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3231632, "linear_attention_total": 2359296, "linear_attention_nnz": 404736, "linear_dense_total": 4718592, "linear_dense_nnz": 2826896}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3411716, "linear_attention_total": 2359296, "linear_attention_nnz": 543040, "linear_dense_total": 4718592, "linear_dense_nnz": 2868676}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3557965, "linear_attention_total": 2359296, "linear_attention_nnz": 614464, "linear_dense_total": 4718592, "linear_dense_nnz": 2943501}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3706774, "linear_attention_total": 2359296, "linear_attention_nnz": 790144, "linear_dense_total": 4718592, "linear_dense_nnz": 2916630}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3693580, "linear_attention_total": 2359296, "linear_attention_nnz": 816832, "linear_dense_total": 4718592, "linear_dense_nnz": 2876748}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3641505, "linear_attention_total": 2359296, "linear_attention_nnz": 785920, "linear_dense_total": 4718592, "linear_dense_nnz": 2855585}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3484162, "linear_attention_total": 2359296, "linear_attention_nnz": 759424, "linear_dense_total": 4718592, "linear_dense_nnz": 2724738}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114894, "linear_attention_total": 2359296, "linear_attention_nnz": 687040, "linear_dense_total": 4718592, "linear_dense_nnz": 2427854}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2617066, "linear_attention_total": 2359296, "linear_attention_nnz": 603648, "linear_dense_total": 4718592, "linear_dense_nnz": 2013418}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1773359, "linear_attention_total": 2359296, "linear_attention_nnz": 379328, "linear_dense_total": 4718592, "linear_dense_nnz": 1394031}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404175, "linear_attention_total": 2359296, "linear_attention_nnz": 322624, "linear_dense_total": 4718592, "linear_dense_nnz": 1081551}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 763893, "linear_attention_total": 2359296, "linear_attention_nnz": 172288, "linear_dense_total": 4718592, "linear_dense_nnz": 591605}}, "total_sparsity": 46.42043166961797, "linear_sparsity": 59.49742705733687}, "speed": {"eval_elapsed_time": 33.090760480146855}, "opt_eval_metrics": {"exact_match": 81.01229895931883, "f1": 88.16022239737082}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 63788226, "linear_total": 84934656, "linear_nnz": 39853312, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3867392, "linear_attention_total": 2359296, "linear_attention_nnz": 418816, "linear_dense_total": 4718592, "linear_dense_nnz": 3448576}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4250112, "linear_attention_total": 2359296, "linear_attention_nnz": 553728, "linear_dense_total": 4718592, "linear_dense_nnz": 3696384}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4519424, "linear_attention_total": 2359296, "linear_attention_nnz": 562432, "linear_dense_total": 4718592, "linear_dense_nnz": 3956992}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4809728, "linear_attention_total": 2359296, "linear_attention_nnz": 827392, "linear_dense_total": 4718592, "linear_dense_nnz": 3982336}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4702976, "linear_attention_total": 2359296, "linear_attention_nnz": 790016, "linear_dense_total": 4718592, "linear_dense_nnz": 3912960}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4601344, "linear_attention_total": 2359296, "linear_attention_nnz": 701696, "linear_dense_total": 4718592, "linear_dense_nnz": 3899648}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4243456, "linear_attention_total": 2359296, "linear_attention_nnz": 667392, "linear_dense_total": 4718592, "linear_dense_nnz": 3576064}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3669248, "linear_attention_total": 2359296, "linear_attention_nnz": 700416, "linear_dense_total": 4718592, "linear_dense_nnz": 2968832}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2404096, "linear_attention_total": 2359296, "linear_attention_nnz": 437504, "linear_dense_total": 4718592, "linear_dense_nnz": 1966592}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1054976, "linear_attention_total": 2359296, "linear_attention_nnz": 361472, "linear_dense_total": 4718592, "linear_dense_nnz": 693504}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 946176, "linear_attention_total": 2359296, "linear_attention_nnz": 291584, "linear_dense_total": 4718592, "linear_dense_nnz": 654592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784384, "linear_attention_total": 2359296, "linear_attention_nnz": 168960, "linear_dense_total": 4718592, "linear_dense_nnz": 615424}}, "total_sparsity": 41.42128782970864, "linear_sparsity": 53.077678915895056}, "speed": {"eval_elapsed_time": 26.95584986009635}, "opt_eval_metrics": {"exact_match": 80.02838221381268, "f1": 87.51569063636161}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 63672482, "linear_total": 84934656, "linear_nnz": 39737600, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3861248, "linear_attention_total": 2359296, "linear_attention_nnz": 416256, "linear_dense_total": 4718592, "linear_dense_nnz": 3444992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4250368, "linear_attention_total": 2359296, "linear_attention_nnz": 541952, "linear_dense_total": 4718592, "linear_dense_nnz": 3708416}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4532736, "linear_attention_total": 2359296, "linear_attention_nnz": 555520, "linear_dense_total": 4718592, "linear_dense_nnz": 3977216}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4822784, "linear_attention_total": 2359296, "linear_attention_nnz": 802816, "linear_dense_total": 4718592, "linear_dense_nnz": 4019968}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4714240, "linear_attention_total": 2359296, "linear_attention_nnz": 774400, "linear_dense_total": 4718592, "linear_dense_nnz": 3939840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4584192, "linear_attention_total": 2359296, "linear_attention_nnz": 686592, "linear_dense_total": 4718592, "linear_dense_nnz": 3897600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4225024, "linear_attention_total": 2359296, "linear_attention_nnz": 656384, "linear_dense_total": 4718592, "linear_dense_nnz": 3568640}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3634176, "linear_attention_total": 2359296, "linear_attention_nnz": 676864, "linear_dense_total": 4718592, "linear_dense_nnz": 2957312}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2363904, "linear_attention_total": 2359296, "linear_attention_nnz": 432640, "linear_dense_total": 4718592, "linear_dense_nnz": 1931264}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1030400, "linear_attention_total": 2359296, "linear_attention_nnz": 345344, "linear_dense_total": 4718592, "linear_dense_nnz": 685056}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 933120, "linear_attention_total": 2359296, "linear_attention_nnz": 285184, "linear_dense_total": 4718592, "linear_dense_nnz": 647936}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 785408, "linear_attention_total": 2359296, "linear_attention_nnz": 174080, "linear_dense_total": 4718592, "linear_dense_nnz": 611328}}, "total_sparsity": 41.52757914531035, "linear_sparsity": 53.213915412808646}, "speed": {"eval_elapsed_time": 26.93265108484775}, "opt_eval_metrics": {"exact_match": 79.55534531693472, "f1": 87.439750439335}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 63651698, "linear_total": 84934656, "linear_nnz": 39716864, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3864832, "linear_attention_total": 2359296, "linear_attention_nnz": 417024, "linear_dense_total": 4718592, "linear_dense_nnz": 3447808}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4246016, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 3703296}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4537600, "linear_attention_total": 2359296, "linear_attention_nnz": 555776, "linear_dense_total": 4718592, "linear_dense_nnz": 3981824}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4824576, "linear_attention_total": 2359296, "linear_attention_nnz": 810240, "linear_dense_total": 4718592, "linear_dense_nnz": 4014336}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4704768, "linear_attention_total": 2359296, "linear_attention_nnz": 764160, "linear_dense_total": 4718592, "linear_dense_nnz": 3940608}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4590080, "linear_attention_total": 2359296, "linear_attention_nnz": 685824, "linear_dense_total": 4718592, "linear_dense_nnz": 3904256}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4219136, "linear_attention_total": 2359296, "linear_attention_nnz": 647680, "linear_dense_total": 4718592, "linear_dense_nnz": 3571456}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3640576, "linear_attention_total": 2359296, "linear_attention_nnz": 684288, "linear_dense_total": 4718592, "linear_dense_nnz": 2956288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2360064, "linear_attention_total": 2359296, "linear_attention_nnz": 427264, "linear_dense_total": 4718592, "linear_dense_nnz": 1932800}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1033472, "linear_attention_total": 2359296, "linear_attention_nnz": 350976, "linear_dense_total": 4718592, "linear_dense_nnz": 682496}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 920832, "linear_attention_total": 2359296, "linear_attention_nnz": 273408, "linear_dense_total": 4718592, "linear_dense_nnz": 647424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 774912, "linear_attention_total": 2359296, "linear_attention_nnz": 166400, "linear_dense_total": 4718592, "linear_dense_nnz": 608512}}, "total_sparsity": 41.546665739029805, "linear_sparsity": 53.238329475308646}, "speed": {"eval_elapsed_time": 26.93877486907877}, "opt_eval_metrics": {"exact_match": 79.67833491012298, "f1": 87.29496050765553}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl2--2021-01-21--00-53-13/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 38271273, "linear_total": 84934656, "linear_nnz": 14360064, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 991744, "linear_attention_total": 2359296, "linear_attention_nnz": 864256, "linear_dense_total": 4718592, "linear_dense_nnz": 127488}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 965120, "linear_attention_total": 2359296, "linear_attention_nnz": 748544, "linear_dense_total": 4718592, "linear_dense_nnz": 216576}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1487360, "linear_attention_total": 2359296, "linear_attention_nnz": 1163264, "linear_dense_total": 4718592, "linear_dense_nnz": 324096}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1767424, "linear_attention_total": 2359296, "linear_attention_nnz": 1389568, "linear_dense_total": 4718592, "linear_dense_nnz": 377856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1864704, "linear_attention_total": 2359296, "linear_attention_nnz": 1449984, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1713664, "linear_attention_total": 2359296, "linear_attention_nnz": 1349632, "linear_dense_total": 4718592, "linear_dense_nnz": 364032}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1481216, "linear_attention_total": 2359296, "linear_attention_nnz": 1187840, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1190400, "linear_attention_total": 2359296, "linear_attention_nnz": 964608, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1191424, "linear_attention_total": 2359296, "linear_attention_nnz": 1063936, "linear_dense_total": 4718592, "linear_dense_nnz": 127488}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708608, "linear_attention_total": 2359296, "linear_attention_nnz": 650240, "linear_dense_total": 4718592, "linear_dense_nnz": 58368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 556544, "linear_attention_total": 2359296, "linear_attention_nnz": 502784, "linear_dense_total": 4718592, "linear_dense_nnz": 53760}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 441856, "linear_attention_total": 2359296, "linear_attention_nnz": 360448, "linear_dense_total": 4718592, "linear_dense_nnz": 81408}}, "total_sparsity": 64.85429951512302, "linear_sparsity": 83.0928096064815}, "speed": {"eval_elapsed_time": 17.621023153187707}, "opt_eval_metrics": {"exact_match": 78.67549668874172, "f1": 86.51098653495667}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 59478503, "linear_total": 84934656, "linear_nnz": 35536574, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3362923, "linear_attention_total": 2359296, "linear_attention_nnz": 466432, "linear_dense_total": 4718592, "linear_dense_nnz": 2896491}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3511822, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 2933262}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3642442, "linear_attention_total": 2359296, "linear_attention_nnz": 636672, "linear_dense_total": 4718592, "linear_dense_nnz": 3005770}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3843335, "linear_attention_total": 2359296, "linear_attention_nnz": 857344, "linear_dense_total": 4718592, "linear_dense_nnz": 2985991}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3768311, "linear_attention_total": 2359296, "linear_attention_nnz": 829184, "linear_dense_total": 4718592, "linear_dense_nnz": 2939127}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3670401, "linear_attention_total": 2359296, "linear_attention_nnz": 754432, "linear_dense_total": 4718592, "linear_dense_nnz": 2915969}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3555086, "linear_attention_total": 2359296, "linear_attention_nnz": 767488, "linear_dense_total": 4718592, "linear_dense_nnz": 2787598}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3250234, "linear_attention_total": 2359296, "linear_attention_nnz": 752640, "linear_dense_total": 4718592, "linear_dense_nnz": 2497594}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2669249, "linear_attention_total": 2359296, "linear_attention_nnz": 553472, "linear_dense_total": 4718592, "linear_dense_nnz": 2115777}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1903656, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 1490216}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522534, "linear_attention_total": 2359296, "linear_attention_nnz": 353792, "linear_dense_total": 4718592, "linear_dense_nnz": 1168742}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 836581, "linear_attention_total": 2359296, "linear_attention_nnz": 204032, "linear_dense_total": 4718592, "linear_dense_nnz": 632549}}, "total_sparsity": 45.379040521415185, "linear_sparsity": 58.160101337197375}, "speed": {"eval_elapsed_time": 30.383016001898795}, "opt_eval_metrics": {"exact_match": 80.93661305581836, "f1": 88.29241912882233}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 59470230, "linear_total": 84934656, "linear_nnz": 35528301, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3365714, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 2896466}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3508110, "linear_attention_total": 2359296, "linear_attention_nnz": 574976, "linear_dense_total": 4718592, "linear_dense_nnz": 2933134}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3640290, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 3005666}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3837370, "linear_attention_total": 2359296, "linear_attention_nnz": 851456, "linear_dense_total": 4718592, "linear_dense_nnz": 2985914}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3769702, "linear_attention_total": 2359296, "linear_attention_nnz": 830720, "linear_dense_total": 4718592, "linear_dense_nnz": 2938982}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3672353, "linear_attention_total": 2359296, "linear_attention_nnz": 756480, "linear_dense_total": 4718592, "linear_dense_nnz": 2915873}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3555719, "linear_attention_total": 2359296, "linear_attention_nnz": 768256, "linear_dense_total": 4718592, "linear_dense_nnz": 2787463}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3250893, "linear_attention_total": 2359296, "linear_attention_nnz": 753408, "linear_dense_total": 4718592, "linear_dense_nnz": 2497485}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666597, "linear_attention_total": 2359296, "linear_attention_nnz": 550912, "linear_dense_total": 4718592, "linear_dense_nnz": 2115685}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1903316, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 1490132}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1524282, "linear_attention_total": 2359296, "linear_attention_nnz": 355584, "linear_dense_total": 4718592, "linear_dense_nnz": 1168698}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 833955, "linear_attention_total": 2359296, "linear_attention_nnz": 201472, "linear_dense_total": 4718592, "linear_dense_nnz": 632483}}, "total_sparsity": 45.38663787466004, "linear_sparsity": 58.16984176635742}, "speed": {"eval_elapsed_time": 30.506126267835498}, "opt_eval_metrics": {"exact_match": 80.77578051087986, "f1": 88.22778160568927}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41069735, "linear_total": 84934656, "linear_nnz": 17134148, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825423, "linear_attention_total": 2359296, "linear_attention_nnz": 185152, "linear_dense_total": 4718592, "linear_dense_nnz": 1640271}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2011232, "linear_attention_total": 2359296, "linear_attention_nnz": 309376, "linear_dense_total": 4718592, "linear_dense_nnz": 1701856}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2013521, "linear_attention_total": 2359296, "linear_attention_nnz": 266368, "linear_dense_total": 4718592, "linear_dense_nnz": 1747153}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2151481, "linear_attention_total": 2359296, "linear_attention_nnz": 452288, "linear_dense_total": 4718592, "linear_dense_nnz": 1699193}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1937929, "linear_attention_total": 2359296, "linear_attention_nnz": 315584, "linear_dense_total": 4718592, "linear_dense_nnz": 1622345}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1906344, "linear_attention_total": 2359296, "linear_attention_nnz": 324160, "linear_dense_total": 4718592, "linear_dense_nnz": 1582184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660767, "linear_attention_total": 2359296, "linear_attention_nnz": 264448, "linear_dense_total": 4718592, "linear_dense_nnz": 1396319}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1418922, "linear_attention_total": 2359296, "linear_attention_nnz": 312704, "linear_dense_total": 4718592, "linear_dense_nnz": 1106218}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 973188, "linear_attention_total": 2359296, "linear_attention_nnz": 176128, "linear_dense_total": 4718592, "linear_dense_nnz": 797060}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574608, "linear_attention_total": 2359296, "linear_attention_nnz": 178368, "linear_dense_total": 4718592, "linear_dense_nnz": 396240}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 423537, "linear_attention_total": 2359296, "linear_attention_nnz": 140224, "linear_dense_total": 4718592, "linear_dense_nnz": 283313}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 237196, "linear_attention_total": 2359296, "linear_attention_nnz": 82304, "linear_dense_total": 4718592, "linear_dense_nnz": 154892}}, "total_sparsity": 62.28438480989986, "linear_sparsity": 79.82667051715615}, "speed": {"eval_elapsed_time": 28.065979121020064}, "opt_eval_metrics": {"exact_match": 78.59981078524125, "f1": 86.70965342219107}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40928357, "linear_total": 84934656, "linear_nnz": 16992855, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1811376, "linear_attention_total": 2359296, "linear_attention_nnz": 181120, "linear_dense_total": 4718592, "linear_dense_nnz": 1630256}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1999638, "linear_attention_total": 2359296, "linear_attention_nnz": 307392, "linear_dense_total": 4718592, "linear_dense_nnz": 1692246}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2004326, "linear_attention_total": 2359296, "linear_attention_nnz": 266880, "linear_dense_total": 4718592, "linear_dense_nnz": 1737446}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2132105, "linear_attention_total": 2359296, "linear_attention_nnz": 442304, "linear_dense_total": 4718592, "linear_dense_nnz": 1689801}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1922729, "linear_attention_total": 2359296, "linear_attention_nnz": 309632, "linear_dense_total": 4718592, "linear_dense_nnz": 1613097}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886520, "linear_attention_total": 2359296, "linear_attention_nnz": 313664, "linear_dense_total": 4718592, "linear_dense_nnz": 1572856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1646649, "linear_attention_total": 2359296, "linear_attention_nnz": 259072, "linear_dense_total": 4718592, "linear_dense_nnz": 1387577}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404939, "linear_attention_total": 2359296, "linear_attention_nnz": 306112, "linear_dense_total": 4718592, "linear_dense_nnz": 1098827}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 964630, "linear_attention_total": 2359296, "linear_attention_nnz": 173184, "linear_dense_total": 4718592, "linear_dense_nnz": 791446}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566118, "linear_attention_total": 2359296, "linear_attention_nnz": 172928, "linear_dense_total": 4718592, "linear_dense_nnz": 393190}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417586, "linear_attention_total": 2359296, "linear_attention_nnz": 136448, "linear_dense_total": 4718592, "linear_dense_nnz": 281138}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 236239, "linear_attention_total": 2359296, "linear_attention_nnz": 82304, "linear_dense_total": 4718592, "linear_dense_nnz": 153935}}, "total_sparsity": 62.414216625088, "linear_sparsity": 79.99302546183267}, "speed": {"eval_elapsed_time": 28.04132828908041}, "opt_eval_metrics": {"exact_match": 78.78902554399244, "f1": 86.80367154149816}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 40912185, "linear_total": 84934656, "linear_nnz": 16976675, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1810859, "linear_attention_total": 2359296, "linear_attention_nnz": 180736, "linear_dense_total": 4718592, "linear_dense_nnz": 1630123}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998023, "linear_attention_total": 2359296, "linear_attention_nnz": 305920, "linear_dense_total": 4718592, "linear_dense_nnz": 1692103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2001199, "linear_attention_total": 2359296, "linear_attention_nnz": 263936, "linear_dense_total": 4718592, "linear_dense_nnz": 1737263}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2132118, "linear_attention_total": 2359296, "linear_attention_nnz": 442496, "linear_dense_total": 4718592, "linear_dense_nnz": 1689622}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1919231, "linear_attention_total": 2359296, "linear_attention_nnz": 306304, "linear_dense_total": 4718592, "linear_dense_nnz": 1612927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1884897, "linear_attention_total": 2359296, "linear_attention_nnz": 312128, "linear_dense_total": 4718592, "linear_dense_nnz": 1572769}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1645758, "linear_attention_total": 2359296, "linear_attention_nnz": 258304, "linear_dense_total": 4718592, "linear_dense_nnz": 1387454}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404565, "linear_attention_total": 2359296, "linear_attention_nnz": 305856, "linear_dense_total": 4718592, "linear_dense_nnz": 1098709}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 963790, "linear_attention_total": 2359296, "linear_attention_nnz": 172480, "linear_dense_total": 4718592, "linear_dense_nnz": 791310}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 564071, "linear_attention_total": 2359296, "linear_attention_nnz": 170944, "linear_dense_total": 4718592, "linear_dense_nnz": 393127}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 416716, "linear_attention_total": 2359296, "linear_attention_nnz": 135616, "linear_dense_total": 4718592, "linear_dense_nnz": 281100}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 235448, "linear_attention_total": 2359296, "linear_attention_nnz": 81536, "linear_dense_total": 4718592, "linear_dense_nnz": 153912}}, "total_sparsity": 62.42906787574385, "linear_sparsity": 80.01207540064682}, "speed": {"eval_elapsed_time": 28.038834661012515}, "opt_eval_metrics": {"exact_match": 78.6092715231788, "f1": 86.70267601348202}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39496838, "linear_total": 84934656, "linear_nnz": 15559744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1647204, "linear_attention_total": 2359296, "linear_attention_nnz": 145232, "linear_dense_total": 4718592, "linear_dense_nnz": 1501972}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842586, "linear_attention_total": 2359296, "linear_attention_nnz": 280192, "linear_dense_total": 4718592, "linear_dense_nnz": 1562394}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841850, "linear_attention_total": 2359296, "linear_attention_nnz": 234064, "linear_dense_total": 4718592, "linear_dense_nnz": 1607786}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1960732, "linear_attention_total": 2359296, "linear_attention_nnz": 386752, "linear_dense_total": 4718592, "linear_dense_nnz": 1573980}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1776766, "linear_attention_total": 2359296, "linear_attention_nnz": 281632, "linear_dense_total": 4718592, "linear_dense_nnz": 1495134}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1744230, "linear_attention_total": 2359296, "linear_attention_nnz": 288320, "linear_dense_total": 4718592, "linear_dense_nnz": 1455910}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518906, "linear_attention_total": 2359296, "linear_attention_nnz": 240864, "linear_dense_total": 4718592, "linear_dense_nnz": 1278042}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1276331, "linear_attention_total": 2359296, "linear_attention_nnz": 275424, "linear_dense_total": 4718592, "linear_dense_nnz": 1000907}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 878990, "linear_attention_total": 2359296, "linear_attention_nnz": 170816, "linear_dense_total": 4718592, "linear_dense_nnz": 708174}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496916, "linear_attention_total": 2359296, "linear_attention_nnz": 165920, "linear_dense_total": 4718592, "linear_dense_nnz": 330996}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360001, "linear_attention_total": 2359296, "linear_attention_nnz": 126288, "linear_dense_total": 4718592, "linear_dense_nnz": 233713}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 215232, "linear_attention_total": 2359296, "linear_attention_nnz": 73824, "linear_dense_total": 4718592, "linear_dense_nnz": 141408}}, "total_sparsity": 63.728825052469304, "linear_sparsity": 81.68033552758487}, "speed": {"eval_elapsed_time": 29.592536952113733}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 87.22039562207584}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39357122, "linear_total": 84934656, "linear_nnz": 15420094, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1634237, "linear_attention_total": 2359296, "linear_attention_nnz": 142224, "linear_dense_total": 4718592, "linear_dense_nnz": 1492013}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828361, "linear_attention_total": 2359296, "linear_attention_nnz": 275696, "linear_dense_total": 4718592, "linear_dense_nnz": 1552665}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825967, "linear_attention_total": 2359296, "linear_attention_nnz": 227984, "linear_dense_total": 4718592, "linear_dense_nnz": 1597983}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1943566, "linear_attention_total": 2359296, "linear_attention_nnz": 379616, "linear_dense_total": 4718592, "linear_dense_nnz": 1563950}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1761455, "linear_attention_total": 2359296, "linear_attention_nnz": 275824, "linear_dense_total": 4718592, "linear_dense_nnz": 1485631}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1729290, "linear_attention_total": 2359296, "linear_attention_nnz": 282736, "linear_dense_total": 4718592, "linear_dense_nnz": 1446554}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1504955, "linear_attention_total": 2359296, "linear_attention_nnz": 235856, "linear_dense_total": 4718592, "linear_dense_nnz": 1269099}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1263225, "linear_attention_total": 2359296, "linear_attention_nnz": 269520, "linear_dense_total": 4718592, "linear_dense_nnz": 993705}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870496, "linear_attention_total": 2359296, "linear_attention_nnz": 167616, "linear_dense_total": 4718592, "linear_dense_nnz": 702880}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489695, "linear_attention_total": 2359296, "linear_attention_nnz": 161552, "linear_dense_total": 4718592, "linear_dense_nnz": 328143}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 355803, "linear_attention_total": 2359296, "linear_attention_nnz": 124096, "linear_dense_total": 4718592, "linear_dense_nnz": 231707}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 213044, "linear_attention_total": 2359296, "linear_attention_nnz": 72608, "linear_dense_total": 4718592, "linear_dense_nnz": 140436}}, "total_sparsity": 63.85713060135829, "linear_sparsity": 81.84475604398752}, "speed": {"eval_elapsed_time": 29.660654196050018}, "opt_eval_metrics": {"exact_match": 79.12961210974456, "f1": 87.04337592394437}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 39354055, "linear_total": 84934656, "linear_nnz": 15417031, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1634041, "linear_attention_total": 2359296, "linear_attention_nnz": 142224, "linear_dense_total": 4718592, "linear_dense_nnz": 1491817}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828346, "linear_attention_total": 2359296, "linear_attention_nnz": 275888, "linear_dense_total": 4718592, "linear_dense_nnz": 1552458}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825560, "linear_attention_total": 2359296, "linear_attention_nnz": 227744, "linear_dense_total": 4718592, "linear_dense_nnz": 1597816}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1942802, "linear_attention_total": 2359296, "linear_attention_nnz": 379008, "linear_dense_total": 4718592, "linear_dense_nnz": 1563794}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1761660, "linear_attention_total": 2359296, "linear_attention_nnz": 276192, "linear_dense_total": 4718592, "linear_dense_nnz": 1485468}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1728493, "linear_attention_total": 2359296, "linear_attention_nnz": 282096, "linear_dense_total": 4718592, "linear_dense_nnz": 1446397}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1504843, "linear_attention_total": 2359296, "linear_attention_nnz": 235856, "linear_dense_total": 4718592, "linear_dense_nnz": 1268987}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1262994, "linear_attention_total": 2359296, "linear_attention_nnz": 269456, "linear_dense_total": 4718592, "linear_dense_nnz": 993538}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870263, "linear_attention_total": 2359296, "linear_attention_nnz": 167520, "linear_dense_total": 4718592, "linear_dense_nnz": 702743}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489503, "linear_attention_total": 2359296, "linear_attention_nnz": 161424, "linear_dense_total": 4718592, "linear_dense_nnz": 328079}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 355610, "linear_attention_total": 2359296, "linear_attention_nnz": 123920, "linear_dense_total": 4718592, "linear_dense_nnz": 231690}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 212916, "linear_attention_total": 2359296, "linear_attention_nnz": 72512, "linear_dense_total": 4718592, "linear_dense_nnz": 140404}}, "total_sparsity": 63.859947122862216, "linear_sparsity": 81.84836234575437}, "speed": {"eval_elapsed_time": 29.750202575000003}, "opt_eval_metrics": {"exact_match": 79.09176915799432, "f1": 86.93076968810146}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36724619, "linear_total": 84934656, "linear_nnz": 12816896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 933888, "linear_attention_total": 2359296, "linear_attention_nnz": 522240, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1116160, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1374720, "linear_attention_total": 2359296, "linear_attention_nnz": 675840, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692160, "linear_attention_total": 2359296, "linear_attention_nnz": 977920, "linear_dense_total": 4718592, "linear_dense_nnz": 714240}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1659392, "linear_attention_total": 2359296, "linear_attention_nnz": 825344, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1416192, "linear_attention_total": 2359296, "linear_attention_nnz": 672768, "linear_dense_total": 4718592, "linear_dense_nnz": 743424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1207296, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1235456, "linear_attention_total": 2359296, "linear_attention_nnz": 785408, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 778240, "linear_attention_total": 2359296, "linear_attention_nnz": 514048, "linear_dense_total": 4718592, "linear_dense_nnz": 264192}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 526336, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 101376}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455168, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 421888, "linear_attention_total": 2359296, "linear_attention_nnz": 222208, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}}, "total_sparsity": 66.2746399944621, "linear_sparsity": 84.9096981095679}, "speed": {"eval_elapsed_time": 15.043476368067786}, "opt_eval_metrics": {"exact_match": 78.3349101229896, "f1": 86.4116267700138}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 36711275, "linear_total": 84934656, "linear_nnz": 12803584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 930816, "linear_attention_total": 2359296, "linear_attention_nnz": 519168, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1129472, "linear_attention_total": 2359296, "linear_attention_nnz": 536576, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1366528, "linear_attention_total": 2359296, "linear_attention_nnz": 667648, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1681920, "linear_attention_total": 2359296, "linear_attention_nnz": 967680, "linear_dense_total": 4718592, "linear_dense_nnz": 714240}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1669632, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1412096, "linear_attention_total": 2359296, "linear_attention_nnz": 668672, "linear_dense_total": 4718592, "linear_dense_nnz": 743424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1221632, "linear_attention_total": 2359296, "linear_attention_nnz": 653312, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1237504, "linear_attention_total": 2359296, "linear_attention_nnz": 787456, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 757760, "linear_attention_total": 2359296, "linear_attention_nnz": 493568, "linear_dense_total": 4718592, "linear_dense_nnz": 264192}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 526336, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 101376}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 443904, "linear_attention_total": 2359296, "linear_attention_nnz": 356352, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 425984, "linear_attention_total": 2359296, "linear_attention_nnz": 226304, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}}, "total_sparsity": 66.28689420474849, "linear_sparsity": 84.92537133487654}, "speed": {"eval_elapsed_time": 15.047897994983941}, "opt_eval_metrics": {"exact_match": 78.37275307473983, "f1": 86.39441106336629}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l10--2021-01-20--18-59-37/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 72403618, "linear_total": 84934656, "linear_nnz": 48458624, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4451008, "linear_attention_total": 2359296, "linear_attention_nnz": 446336, "linear_dense_total": 4718592, "linear_dense_nnz": 4004672}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4674880, "linear_attention_total": 2359296, "linear_attention_nnz": 597248, "linear_dense_total": 4718592, "linear_dense_nnz": 4077632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4954368, "linear_attention_total": 2359296, "linear_attention_nnz": 799296, "linear_dense_total": 4718592, "linear_dense_nnz": 4155072}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5115648, "linear_attention_total": 2359296, "linear_attention_nnz": 950208, "linear_dense_total": 4718592, "linear_dense_nnz": 4165440}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5174848, "linear_attention_total": 2359296, "linear_attention_nnz": 1022400, "linear_dense_total": 4718592, "linear_dense_nnz": 4152448}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5020992, "linear_attention_total": 2359296, "linear_attention_nnz": 914368, "linear_dense_total": 4718592, "linear_dense_nnz": 4106624}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4879296, "linear_attention_total": 2359296, "linear_attention_nnz": 918208, "linear_dense_total": 4718592, "linear_dense_nnz": 3961088}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4517696, "linear_attention_total": 2359296, "linear_attention_nnz": 832704, "linear_dense_total": 4718592, "linear_dense_nnz": 3684992}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3922688, "linear_attention_total": 2359296, "linear_attention_nnz": 715648, "linear_dense_total": 4718592, "linear_dense_nnz": 3207040}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2584512, "linear_attention_total": 2359296, "linear_attention_nnz": 467072, "linear_dense_total": 4718592, "linear_dense_nnz": 2117440}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879872, "linear_attention_total": 2359296, "linear_attention_nnz": 362688, "linear_dense_total": 4718592, "linear_dense_nnz": 1517184}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1282816, "linear_attention_total": 2359296, "linear_attention_nnz": 218432, "linear_dense_total": 4718592, "linear_dense_nnz": 1064384}}, "total_sparsity": 33.50950536060172, "linear_sparsity": 42.94599368248457}, "speed": {"eval_elapsed_time": 32.87611435819417}, "opt_eval_metrics": {"exact_match": 81.10690633869442, "f1": 88.3744311515211}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l10--2021-01-20--18-59-37/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 72395170, "linear_total": 84934656, "linear_nnz": 48450176, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4450944, "linear_attention_total": 2359296, "linear_attention_nnz": 446080, "linear_dense_total": 4718592, "linear_dense_nnz": 4004864}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4674240, "linear_attention_total": 2359296, "linear_attention_nnz": 597312, "linear_dense_total": 4718592, "linear_dense_nnz": 4076928}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4955648, "linear_attention_total": 2359296, "linear_attention_nnz": 800192, "linear_dense_total": 4718592, "linear_dense_nnz": 4155456}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5114624, "linear_attention_total": 2359296, "linear_attention_nnz": 948864, "linear_dense_total": 4718592, "linear_dense_nnz": 4165760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5171840, "linear_attention_total": 2359296, "linear_attention_nnz": 1019200, "linear_dense_total": 4718592, "linear_dense_nnz": 4152640}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5023808, "linear_attention_total": 2359296, "linear_attention_nnz": 915392, "linear_dense_total": 4718592, "linear_dense_nnz": 4108416}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4876544, "linear_attention_total": 2359296, "linear_attention_nnz": 916160, "linear_dense_total": 4718592, "linear_dense_nnz": 3960384}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4519232, "linear_attention_total": 2359296, "linear_attention_nnz": 834176, "linear_dense_total": 4718592, "linear_dense_nnz": 3685056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3921792, "linear_attention_total": 2359296, "linear_attention_nnz": 713856, "linear_dense_total": 4718592, "linear_dense_nnz": 3207936}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2581056, "linear_attention_total": 2359296, "linear_attention_nnz": 465600, "linear_dense_total": 4718592, "linear_dense_nnz": 2115456}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879424, "linear_attention_total": 2359296, "linear_attention_nnz": 362048, "linear_dense_total": 4718592, "linear_dense_nnz": 1517376}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1281024, "linear_attention_total": 2359296, "linear_attention_nnz": 217216, "linear_dense_total": 4718592, "linear_dense_nnz": 1063808}}, "total_sparsity": 33.51726342179023, "linear_sparsity": 42.95594015239198}, "speed": {"eval_elapsed_time": 32.87603668309748}, "opt_eval_metrics": {"exact_match": 80.82308420056765, "f1": 88.21300800880684}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 44702229, "linear_total": 84934656, "linear_nnz": 20786688, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1582592, "linear_attention_total": 2359296, "linear_attention_nnz": 1055744, "linear_dense_total": 4718592, "linear_dense_nnz": 526848}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1562624, "linear_attention_total": 2359296, "linear_attention_nnz": 809984, "linear_dense_total": 4718592, "linear_dense_nnz": 752640}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2190848, "linear_attention_total": 2359296, "linear_attention_nnz": 1316864, "linear_dense_total": 4718592, "linear_dense_nnz": 873984}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2420736, "linear_attention_total": 2359296, "linear_attention_nnz": 1468416, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2697728, "linear_attention_total": 2359296, "linear_attention_nnz": 1651712, "linear_dense_total": 4718592, "linear_dense_nnz": 1046016}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2603008, "linear_attention_total": 2359296, "linear_attention_nnz": 1616896, "linear_dense_total": 4718592, "linear_dense_nnz": 986112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102272, "linear_attention_total": 2359296, "linear_attention_nnz": 1361920, "linear_dense_total": 4718592, "linear_dense_nnz": 740352}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1824768, "linear_attention_total": 2359296, "linear_attention_nnz": 1265664, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1505792, "linear_attention_total": 2359296, "linear_attention_nnz": 1212416, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 863232, "linear_attention_total": 2359296, "linear_attention_nnz": 749568, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 652288, "linear_dense_total": 4718592, "linear_dense_nnz": 98304}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 682496, "linear_attention_total": 2359296, "linear_attention_nnz": 419840, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}}, "total_sparsity": 58.94855257518133, "linear_sparsity": 75.52625868055556}, "speed": {"eval_elapsed_time": 19.962007428053766}, "opt_eval_metrics": {"exact_match": 80.58656575212866, "f1": 88.06903108265608}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l10--2021-01-20--18-58-11/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 72878482, "linear_total": 84934656, "linear_nnz": 48937216, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4586496, "linear_attention_total": 2359296, "linear_attention_nnz": 517888, "linear_dense_total": 4718592, "linear_dense_nnz": 4068608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4844288, "linear_attention_total": 2359296, "linear_attention_nnz": 641536, "linear_dense_total": 4718592, "linear_dense_nnz": 4202752}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5155328, "linear_attention_total": 2359296, "linear_attention_nnz": 841472, "linear_dense_total": 4718592, "linear_dense_nnz": 4313856}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5409024, "linear_attention_total": 2359296, "linear_attention_nnz": 1072896, "linear_dense_total": 4718592, "linear_dense_nnz": 4336128}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5385984, "linear_attention_total": 2359296, "linear_attention_nnz": 1068800, "linear_dense_total": 4718592, "linear_dense_nnz": 4317184}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5272832, "linear_attention_total": 2359296, "linear_attention_nnz": 961792, "linear_dense_total": 4718592, "linear_dense_nnz": 4311040}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5128448, "linear_attention_total": 2359296, "linear_attention_nnz": 986880, "linear_dense_total": 4718592, "linear_dense_nnz": 4141568}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4725504, "linear_attention_total": 2359296, "linear_attention_nnz": 905472, "linear_dense_total": 4718592, "linear_dense_nnz": 3820032}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3841792, "linear_attention_total": 2359296, "linear_attention_nnz": 756224, "linear_dense_total": 4718592, "linear_dense_nnz": 3085568}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879808, "linear_attention_total": 2359296, "linear_attention_nnz": 463360, "linear_dense_total": 4718592, "linear_dense_nnz": 1416448}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1505792, "linear_attention_total": 2359296, "linear_attention_nnz": 415488, "linear_dense_total": 4718592, "linear_dense_nnz": 1090304}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1201920, "linear_attention_total": 2359296, "linear_attention_nnz": 254720, "linear_dense_total": 4718592, "linear_dense_nnz": 947200}}, "total_sparsity": 33.07342297799975, "linear_sparsity": 42.38251109182099}, "speed": {"eval_elapsed_time": 30.725059562828392}, "opt_eval_metrics": {"exact_match": 80.93661305581836, "f1": 88.34112193061533}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 34772839, "linear_total": 84934656, "linear_nnz": 10866176, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 732160, "linear_attention_total": 2359296, "linear_attention_nnz": 550912, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835072, "linear_attention_total": 2359296, "linear_attention_nnz": 535552, "linear_dense_total": 4718592, "linear_dense_nnz": 299520}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1128960, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 407040}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1551872, "linear_attention_total": 2359296, "linear_attention_nnz": 1111040, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1389056, "linear_attention_total": 2359296, "linear_attention_nnz": 892928, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1096704, "linear_attention_total": 2359296, "linear_attention_nnz": 663552, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1000448, "linear_attention_total": 2359296, "linear_attention_nnz": 662528, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 801792, "linear_dense_total": 4718592, "linear_dense_nnz": 268800}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 803328, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 498688, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 73728}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 422912, "linear_attention_total": 2359296, "linear_attention_nnz": 364544, "linear_dense_total": 4718592, "linear_dense_nnz": 58368}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 336384, "linear_attention_total": 2359296, "linear_attention_nnz": 239616, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 68.06702028169144, "linear_sparsity": 87.20642843364197}, "speed": {"eval_elapsed_time": 14.562878740951419}, "opt_eval_metrics": {"exact_match": 77.8240302743614, "f1": 86.11992485005756}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l40--2021-01-20--18-59-08/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53223538, "linear_total": 84934656, "linear_nnz": 29295872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2685696, "linear_attention_total": 2359296, "linear_attention_nnz": 331008, "linear_dense_total": 4718592, "linear_dense_nnz": 2354688}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3258624, "linear_attention_total": 2359296, "linear_attention_nnz": 432384, "linear_dense_total": 4718592, "linear_dense_nnz": 2826240}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3726080, "linear_attention_total": 2359296, "linear_attention_nnz": 423936, "linear_dense_total": 4718592, "linear_dense_nnz": 3302144}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3917568, "linear_attention_total": 2359296, "linear_attention_nnz": 669440, "linear_dense_total": 4718592, "linear_dense_nnz": 3248128}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3647232, "linear_attention_total": 2359296, "linear_attention_nnz": 453632, "linear_dense_total": 4718592, "linear_dense_nnz": 3193600}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3593472, "linear_attention_total": 2359296, "linear_attention_nnz": 473856, "linear_dense_total": 4718592, "linear_dense_nnz": 3119616}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2939648, "linear_attention_total": 2359296, "linear_attention_nnz": 445952, "linear_dense_total": 4718592, "linear_dense_nnz": 2493696}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2381824, "linear_attention_total": 2359296, "linear_attention_nnz": 490752, "linear_dense_total": 4718592, "linear_dense_nnz": 1891072}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1384448, "linear_attention_total": 2359296, "linear_attention_nnz": 275712, "linear_dense_total": 4718592, "linear_dense_nnz": 1108736}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 258304, "linear_dense_total": 4718592, "linear_dense_nnz": 348928}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 618752, "linear_attention_total": 2359296, "linear_attention_nnz": 203008, "linear_dense_total": 4718592, "linear_dense_nnz": 415744}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 535296, "linear_attention_total": 2359296, "linear_attention_nnz": 112128, "linear_dense_total": 4718592, "linear_dense_nnz": 423168}}, "total_sparsity": 51.12316945157615, "linear_sparsity": 65.5077522183642}, "speed": {"eval_elapsed_time": 23.845138414064422}, "opt_eval_metrics": {"exact_match": 78.11731315042573, "f1": 86.14927876930865}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l10--2021-01-19--17-00-07/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 56967217, "linear_total": 84934656, "linear_nnz": 33019881, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3067235, "linear_attention_total": 2359296, "linear_attention_nnz": 356016, "linear_dense_total": 4718592, "linear_dense_nnz": 2711219}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3260347, "linear_attention_total": 2359296, "linear_attention_nnz": 506400, "linear_dense_total": 4718592, "linear_dense_nnz": 2753947}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3486676, "linear_attention_total": 2359296, "linear_attention_nnz": 658880, "linear_dense_total": 4718592, "linear_dense_nnz": 2827796}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3592390, "linear_attention_total": 2359296, "linear_attention_nnz": 782176, "linear_dense_total": 4718592, "linear_dense_nnz": 2810214}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3644732, "linear_attention_total": 2359296, "linear_attention_nnz": 874272, "linear_dense_total": 4718592, "linear_dense_nnz": 2770460}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3523230, "linear_attention_total": 2359296, "linear_attention_nnz": 772928, "linear_dense_total": 4718592, "linear_dense_nnz": 2750302}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3378315, "linear_attention_total": 2359296, "linear_attention_nnz": 767984, "linear_dense_total": 4718592, "linear_dense_nnz": 2610331}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2983346, "linear_attention_total": 2359296, "linear_attention_nnz": 687968, "linear_dense_total": 4718592, "linear_dense_nnz": 2295378}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465095, "linear_attention_total": 2359296, "linear_attention_nnz": 596368, "linear_dense_total": 4718592, "linear_dense_nnz": 1868727}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1650223, "linear_attention_total": 2359296, "linear_attention_nnz": 404448, "linear_dense_total": 4718592, "linear_dense_nnz": 1245775}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1262562, "linear_attention_total": 2359296, "linear_attention_nnz": 305952, "linear_dense_total": 4718592, "linear_dense_nnz": 956610}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705730, "linear_attention_total": 2359296, "linear_attention_nnz": 172864, "linear_dense_total": 4718592, "linear_dense_nnz": 532866}}, "total_sparsity": 47.6852325727709, "linear_sparsity": 61.12319451791268}, "speed": {"eval_elapsed_time": 35.13715321500786}, "opt_eval_metrics": {"exact_match": 81.3434247871334, "f1": 88.502960365548}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl2--2021-01-21--00-54-43/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 34069864, "linear_total": 84934656, "linear_nnz": 10163200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 674816, "linear_attention_total": 2359296, "linear_attention_nnz": 598016, "linear_dense_total": 4718592, "linear_dense_nnz": 76800}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 621568, "linear_dense_total": 4718592, "linear_dense_nnz": 129024}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1137664, "linear_attention_total": 2359296, "linear_attention_nnz": 937984, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1458176, "linear_attention_total": 2359296, "linear_attention_nnz": 1193984, "linear_dense_total": 4718592, "linear_dense_nnz": 264192}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1335808, "linear_attention_total": 2359296, "linear_attention_nnz": 1057792, "linear_dense_total": 4718592, "linear_dense_nnz": 278016}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 843264, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 948736, "linear_attention_total": 2359296, "linear_attention_nnz": 759808, "linear_dense_total": 4718592, "linear_dense_nnz": 188928}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 979456, "linear_attention_total": 2359296, "linear_attention_nnz": 830464, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 833536, "linear_attention_total": 2359296, "linear_attention_nnz": 753664, "linear_dense_total": 4718592, "linear_dense_nnz": 79872}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 478208, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 46080}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 432128, "linear_attention_total": 2359296, "linear_attention_nnz": 395264, "linear_dense_total": 4718592, "linear_dense_nnz": 36864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 290816, "linear_attention_total": 2359296, "linear_attention_nnz": 238592, "linear_dense_total": 4718592, "linear_dense_nnz": 52224}}, "total_sparsity": 68.71258409134985, "linear_sparsity": 88.03409529320987}, "speed": {"eval_elapsed_time": 15.140548604074866}, "opt_eval_metrics": {"exact_match": 76.9914853358562, "f1": 85.26341062121247}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l20--2021-01-20--19-00-06/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 62877338, "linear_total": 84934656, "linear_nnz": 38938240, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3827456, "linear_attention_total": 2359296, "linear_attention_nnz": 326336, "linear_dense_total": 4718592, "linear_dense_nnz": 3501120}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4141120, "linear_attention_total": 2359296, "linear_attention_nnz": 487552, "linear_dense_total": 4718592, "linear_dense_nnz": 3653568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4289088, "linear_attention_total": 2359296, "linear_attention_nnz": 487616, "linear_dense_total": 4718592, "linear_dense_nnz": 3801472}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4512896, "linear_attention_total": 2359296, "linear_attention_nnz": 712832, "linear_dense_total": 4718592, "linear_dense_nnz": 3800064}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4390144, "linear_attention_total": 2359296, "linear_attention_nnz": 646272, "linear_dense_total": 4718592, "linear_dense_nnz": 3743872}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4316928, "linear_attention_total": 2359296, "linear_attention_nnz": 625600, "linear_dense_total": 4718592, "linear_dense_nnz": 3691328}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4036864, "linear_attention_total": 2359296, "linear_attention_nnz": 575808, "linear_dense_total": 4718592, "linear_dense_nnz": 3461056}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3592320, "linear_attention_total": 2359296, "linear_attention_nnz": 579392, "linear_dense_total": 4718592, "linear_dense_nnz": 3012928}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2753408, "linear_attention_total": 2359296, "linear_attention_nnz": 405632, "linear_dense_total": 4718592, "linear_dense_nnz": 2347776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1318784, "linear_attention_total": 2359296, "linear_attention_nnz": 317440, "linear_dense_total": 4718592, "linear_dense_nnz": 1001344}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 994816, "linear_attention_total": 2359296, "linear_attention_nnz": 238208, "linear_dense_total": 4718592, "linear_dense_nnz": 756608}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 764416, "linear_attention_total": 2359296, "linear_attention_nnz": 141568, "linear_dense_total": 4718592, "linear_dense_nnz": 622848}}, "total_sparsity": 42.257784614732465, "linear_sparsity": 54.1550624517747}, "speed": {"eval_elapsed_time": 29.41211991594173}, "opt_eval_metrics": {"exact_match": 80.05676442762535, "f1": 87.66615713942541}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 42128141, "linear_total": 84934656, "linear_nnz": 18215424, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277440, "linear_attention_total": 2359296, "linear_attention_nnz": 643072, "linear_dense_total": 4718592, "linear_dense_nnz": 634368}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1539584, "linear_attention_total": 2359296, "linear_attention_nnz": 622592, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2068480, "linear_attention_total": 2359296, "linear_attention_nnz": 1051648, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2334208, "linear_attention_total": 2359296, "linear_attention_nnz": 1257472, "linear_dense_total": 4718592, "linear_dense_nnz": 1076736}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2473984, "linear_attention_total": 2359296, "linear_attention_nnz": 1315840, "linear_dense_total": 4718592, "linear_dense_nnz": 1158144}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2078208, "linear_attention_total": 2359296, "linear_attention_nnz": 1004544, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1820160, "linear_attention_total": 2359296, "linear_attention_nnz": 1004544, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1555456, "linear_attention_total": 2359296, "linear_attention_nnz": 925696, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236992, "linear_attention_total": 2359296, "linear_attention_nnz": 899072, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 663040, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 576000, "linear_attention_total": 2359296, "linear_attention_nnz": 463872, "linear_dense_total": 4718592, "linear_dense_nnz": 112128}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591872, "linear_attention_total": 2359296, "linear_attention_nnz": 278528, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}}, "total_sparsity": 61.31241765669342, "linear_sparsity": 78.55360243055556}, "speed": {"eval_elapsed_time": 17.672173040919006}, "opt_eval_metrics": {"exact_match": 79.66887417218543, "f1": 87.3881230572442}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 39533983, "linear_total": 84934656, "linear_nnz": 15622656, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1107968, "linear_attention_total": 2359296, "linear_attention_nnz": 809984, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1204736, "linear_attention_total": 2359296, "linear_attention_nnz": 720896, "linear_dense_total": 4718592, "linear_dense_nnz": 483840}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1717760, "linear_attention_total": 2359296, "linear_attention_nnz": 1098752, "linear_dense_total": 4718592, "linear_dense_nnz": 619008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1967104, "linear_attention_total": 2359296, "linear_attention_nnz": 1309696, "linear_dense_total": 4718592, "linear_dense_nnz": 657408}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2067968, "linear_attention_total": 2359296, "linear_attention_nnz": 1362944, "linear_dense_total": 4718592, "linear_dense_nnz": 705024}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742336, "linear_attention_total": 2359296, "linear_attention_nnz": 1074176, "linear_dense_total": 4718592, "linear_dense_nnz": 668160}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1565696, "linear_attention_total": 2359296, "linear_attention_nnz": 1049600, "linear_dense_total": 4718592, "linear_dense_nnz": 516096}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1342464, "linear_attention_total": 2359296, "linear_attention_nnz": 958464, "linear_dense_total": 4718592, "linear_dense_nnz": 384000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1153536, "linear_attention_total": 2359296, "linear_attention_nnz": 949248, "linear_dense_total": 4718592, "linear_dense_nnz": 204288}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 729088, "linear_attention_total": 2359296, "linear_attention_nnz": 636928, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 551936, "linear_attention_total": 2359296, "linear_attention_nnz": 478208, "linear_dense_total": 4718592, "linear_dense_nnz": 73728}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 472064, "linear_attention_total": 2359296, "linear_attention_nnz": 312320, "linear_dense_total": 4718592, "linear_dense_nnz": 159744}}, "total_sparsity": 63.694713643514845, "linear_sparsity": 81.6062644675926}, "speed": {"eval_elapsed_time": 17.396596929989755}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 87.14755939306319}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl2--2021-01-21--00-51-49/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 42626625, "linear_total": 84934656, "linear_nnz": 18712064, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1420800, "linear_attention_total": 2359296, "linear_attention_nnz": 1210368, "linear_dense_total": 4718592, "linear_dense_nnz": 210432}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1381888, "linear_attention_total": 2359296, "linear_attention_nnz": 977920, "linear_dense_total": 4718592, "linear_dense_nnz": 403968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2013184, "linear_attention_total": 2359296, "linear_attention_nnz": 1500160, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2115072, "linear_attention_total": 2359296, "linear_attention_nnz": 1526784, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2395136, "linear_attention_total": 2359296, "linear_attention_nnz": 1734656, "linear_dense_total": 4718592, "linear_dense_nnz": 660480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2211328, "linear_attention_total": 2359296, "linear_attention_nnz": 1659904, "linear_dense_total": 4718592, "linear_dense_nnz": 551424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1943040, "linear_attention_total": 2359296, "linear_attention_nnz": 1486848, "linear_dense_total": 4718592, "linear_dense_nnz": 456192}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1590784, "linear_attention_total": 2359296, "linear_attention_nnz": 1254400, "linear_dense_total": 4718592, "linear_dense_nnz": 336384}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1441280, "linear_attention_total": 2359296, "linear_attention_nnz": 1267712, "linear_dense_total": 4718592, "linear_dense_nnz": 173568}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 837632, "linear_attention_total": 2359296, "linear_attention_nnz": 760832, "linear_dense_total": 4718592, "linear_dense_nnz": 76800}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 781824, "linear_attention_total": 2359296, "linear_attention_nnz": 712704, "linear_dense_total": 4718592, "linear_dense_nnz": 69120}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 580096, "linear_attention_total": 2359296, "linear_attention_nnz": 443392, "linear_dense_total": 4718592, "linear_dense_nnz": 136704}}, "total_sparsity": 60.85464429335368, "linear_sparsity": 77.96887056327161}, "speed": {"eval_elapsed_time": 19.82656983099878}, "opt_eval_metrics": {"exact_match": 80.10406811731315, "f1": 87.56487698206614}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l40--2021-01-19--16-58-18/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 42014844, "linear_total": 84934656, "linear_nnz": 18080164, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1930333, "linear_attention_total": 2359296, "linear_attention_nnz": 211712, "linear_dense_total": 4718592, "linear_dense_nnz": 1718621}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2116878, "linear_attention_total": 2359296, "linear_attention_nnz": 345600, "linear_dense_total": 4718592, "linear_dense_nnz": 1771278}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2094823, "linear_attention_total": 2359296, "linear_attention_nnz": 278016, "linear_dense_total": 4718592, "linear_dense_nnz": 1816807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2266081, "linear_attention_total": 2359296, "linear_attention_nnz": 493312, "linear_dense_total": 4718592, "linear_dense_nnz": 1772769}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1986893, "linear_attention_total": 2359296, "linear_attention_nnz": 304128, "linear_dense_total": 4718592, "linear_dense_nnz": 1682765}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1992507, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 1635131}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1736239, "linear_attention_total": 2359296, "linear_attention_nnz": 278528, "linear_dense_total": 4718592, "linear_dense_nnz": 1457711}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529879, "linear_attention_total": 2359296, "linear_attention_nnz": 355072, "linear_dense_total": 4718592, "linear_dense_nnz": 1174807}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1051417, "linear_attention_total": 2359296, "linear_attention_nnz": 183552, "linear_dense_total": 4718592, "linear_dense_nnz": 867865}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 636321, "linear_attention_total": 2359296, "linear_attention_nnz": 196864, "linear_dense_total": 4718592, "linear_dense_nnz": 439457}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 483651, "linear_attention_total": 2359296, "linear_attention_nnz": 157696, "linear_dense_total": 4718592, "linear_dense_nnz": 325955}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 255142, "linear_attention_total": 2359296, "linear_attention_nnz": 90368, "linear_dense_total": 4718592, "linear_dense_nnz": 164774}}, "total_sparsity": 61.41646181607727, "linear_sparsity": 78.7128542676384}, "speed": {"eval_elapsed_time": 27.474724027095363}, "opt_eval_metrics": {"exact_match": 78.86471144749291, "f1": 86.87223379259328}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_0/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39362145, "linear_total": 84934656, "linear_nnz": 15449344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1074688, "linear_attention_total": 2359296, "linear_attention_nnz": 796672, "linear_dense_total": 4718592, "linear_dense_nnz": 278016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236224, "linear_attention_total": 2359296, "linear_attention_nnz": 769280, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1635328, "linear_attention_total": 2359296, "linear_attention_nnz": 1056256, "linear_dense_total": 4718592, "linear_dense_nnz": 579072}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 1259264, "linear_dense_total": 4718592, "linear_dense_nnz": 640512}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2041856, "linear_attention_total": 2359296, "linear_attention_nnz": 1344512, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860096, "linear_attention_total": 2359296, "linear_attention_nnz": 1244160, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571072, "linear_attention_total": 2359296, "linear_attention_nnz": 1088768, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1299200, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 382464}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1150464, "linear_attention_total": 2359296, "linear_attention_nnz": 955392, "linear_dense_total": 4718592, "linear_dense_nnz": 195072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668928, "linear_attention_total": 2359296, "linear_attention_nnz": 590592, "linear_dense_total": 4718592, "linear_dense_nnz": 78336}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 548352, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 72192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 463360, "linear_attention_total": 2359296, "linear_attention_nnz": 308224, "linear_dense_total": 4718592, "linear_dense_nnz": 155136}}, "total_sparsity": 63.85251782420986, "linear_sparsity": 81.81031780478395}, "speed": {"eval_elapsed_time": 18.814206156879663}, "opt_eval_metrics": {"exact_match": 79.33774834437087, "f1": 87.07382313022637}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l20--2021-01-19--17-00-34/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 47377613, "linear_total": 84934656, "linear_nnz": 23436196, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2342065, "linear_attention_total": 2359296, "linear_attention_nnz": 233808, "linear_dense_total": 4718592, "linear_dense_nnz": 2108257}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2536721, "linear_attention_total": 2359296, "linear_attention_nnz": 370912, "linear_dense_total": 4718592, "linear_dense_nnz": 2165809}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2607352, "linear_attention_total": 2359296, "linear_attention_nnz": 368864, "linear_dense_total": 4718592, "linear_dense_nnz": 2238488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2740822, "linear_attention_total": 2359296, "linear_attention_nnz": 528528, "linear_dense_total": 4718592, "linear_dense_nnz": 2212294}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2662766, "linear_attention_total": 2359296, "linear_attention_nnz": 515168, "linear_dense_total": 4718592, "linear_dense_nnz": 2147598}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2582248, "linear_attention_total": 2359296, "linear_attention_nnz": 456576, "linear_dense_total": 4718592, "linear_dense_nnz": 2125672}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2384302, "linear_attention_total": 2359296, "linear_attention_nnz": 426512, "linear_dense_total": 4718592, "linear_dense_nnz": 1957790}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045939, "linear_attention_total": 2359296, "linear_attention_nnz": 424416, "linear_dense_total": 4718592, "linear_dense_nnz": 1621523}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1531552, "linear_attention_total": 2359296, "linear_attention_nnz": 311248, "linear_dense_total": 4718592, "linear_dense_nnz": 1220304}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 936640, "linear_attention_total": 2359296, "linear_attention_nnz": 249120, "linear_dense_total": 4718592, "linear_dense_nnz": 687520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 686193, "linear_attention_total": 2359296, "linear_attention_nnz": 189856, "linear_dense_total": 4718592, "linear_dense_nnz": 496337}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379596, "linear_attention_total": 2359296, "linear_attention_nnz": 106192, "linear_dense_total": 4718592, "linear_dense_nnz": 273404}}, "total_sparsity": 56.49166422589565, "linear_sparsity": 72.40679234634212}, "speed": {"eval_elapsed_time": 32.3695623409003}, "opt_eval_metrics": {"exact_match": 80.54872280037843, "f1": 88.09731480353894}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42067458, "linear_total": 84934656, "linear_nnz": 18108928, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437184, "linear_attention_total": 2359296, "linear_attention_nnz": 472576, "linear_dense_total": 4718592, "linear_dense_nnz": 964608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1754624, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015488, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2276608, "linear_attention_total": 2359296, "linear_attention_nnz": 951040, "linear_dense_total": 4718592, "linear_dense_nnz": 1325568}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2280448, "linear_attention_total": 2359296, "linear_attention_nnz": 861184, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2123008, "linear_attention_total": 2359296, "linear_attention_nnz": 779008, "linear_dense_total": 4718592, "linear_dense_nnz": 1344000}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841152, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1041408}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553664, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042432, "linear_attention_total": 2359296, "linear_attention_nnz": 610816, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 584960, "linear_attention_total": 2359296, "linear_attention_nnz": 405248, "linear_dense_total": 4718592, "linear_dense_nnz": 179712}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 540928, "linear_attention_total": 2359296, "linear_attention_nnz": 395008, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 658432, "linear_attention_total": 2359296, "linear_attention_nnz": 217600, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}}, "total_sparsity": 61.3681447432349, "linear_sparsity": 78.6789882330247}, "speed": {"eval_elapsed_time": 17.91969774197787}, "opt_eval_metrics": {"exact_match": 79.38505203405866, "f1": 87.07610213911921}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41735426, "linear_total": 84934656, "linear_nnz": 17776896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1405440, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 929280}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1732352, "linear_attention_total": 2359296, "linear_attention_nnz": 589568, "linear_dense_total": 4718592, "linear_dense_nnz": 1142784}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1979136, "linear_attention_total": 2359296, "linear_attention_nnz": 628992, "linear_dense_total": 4718592, "linear_dense_nnz": 1350144}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2218752, "linear_attention_total": 2359296, "linear_attention_nnz": 913152, "linear_dense_total": 4718592, "linear_dense_nnz": 1305600}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2257664, "linear_attention_total": 2359296, "linear_attention_nnz": 850688, "linear_dense_total": 4718592, "linear_dense_nnz": 1406976}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096384, "linear_attention_total": 2359296, "linear_attention_nnz": 764672, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1786112, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 1022976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1538816, "linear_attention_total": 2359296, "linear_attention_nnz": 781568, "linear_dense_total": 4718592, "linear_dense_nnz": 757248}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1027840, "linear_attention_total": 2359296, "linear_attention_nnz": 596224, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 176640}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523008, "linear_attention_total": 2359296, "linear_attention_nnz": 378624, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640000, "linear_attention_total": 2359296, "linear_attention_nnz": 208384, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.67306005721974, "linear_sparsity": 79.0699146412037}, "speed": {"eval_elapsed_time": 17.6927186998073}, "opt_eval_metrics": {"exact_match": 78.72280037842951, "f1": 86.62745564109652}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 43462146, "linear_total": 84934656, "linear_nnz": 19503616, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660672, "linear_attention_total": 2359296, "linear_attention_nnz": 579328, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 632576, "linear_dense_total": 4718592, "linear_dense_nnz": 1267200}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2031104, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1446912}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2544128, "linear_attention_total": 2359296, "linear_attention_nnz": 1049600, "linear_dense_total": 4718592, "linear_dense_nnz": 1494528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2395904, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 1479168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2184960, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 1394688}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1912320, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1113600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 969216, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 471808, "linear_dense_total": 4718592, "linear_dense_nnz": 497664}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 717312, "linear_attention_total": 2359296, "linear_attention_nnz": 505344, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631040, "linear_attention_total": 2359296, "linear_attention_nnz": 448256, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 288256, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}}, "total_sparsity": 60.08735936884057, "linear_sparsity": 77.03691647376543}, "speed": {"eval_elapsed_time": 16.150802633957937}, "opt_eval_metrics": {"exact_match": 78.93093661305582, "f1": 86.85787750084084}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42678018, "linear_total": 84934656, "linear_nnz": 18719488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1560320, "linear_attention_total": 2359296, "linear_attention_nnz": 543488, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1816320, "linear_attention_total": 2359296, "linear_attention_nnz": 593664, "linear_dense_total": 4718592, "linear_dense_nnz": 1222656}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2024704, "linear_attention_total": 2359296, "linear_attention_nnz": 603904, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2329856, "linear_attention_total": 2359296, "linear_attention_nnz": 870656, "linear_dense_total": 4718592, "linear_dense_nnz": 1459200}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2332928, "linear_attention_total": 2359296, "linear_attention_nnz": 887552, "linear_dense_total": 4718592, "linear_dense_nnz": 1445376}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 720640, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742080, "linear_attention_total": 2359296, "linear_attention_nnz": 926464, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 944384, "linear_attention_total": 2359296, "linear_attention_nnz": 455936, "linear_dense_total": 4718592, "linear_dense_nnz": 488448}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705280, "linear_attention_total": 2359296, "linear_attention_nnz": 505600, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 587264, "linear_attention_total": 2359296, "linear_attention_nnz": 409088, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 697856, "linear_attention_total": 2359296, "linear_attention_nnz": 250880, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}}, "total_sparsity": 60.80744850279245, "linear_sparsity": 77.96012972608024}, "speed": {"eval_elapsed_time": 15.863338297931477}, "opt_eval_metrics": {"exact_match": 78.78902554399244, "f1": 86.64151988736798}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 46192898, "linear_total": 84934656, "linear_nnz": 22234368, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 551680, "linear_dense_total": 4718592, "linear_dense_nnz": 1539072}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2278656, "linear_attention_total": 2359296, "linear_attention_nnz": 596736, "linear_dense_total": 4718592, "linear_dense_nnz": 1681920}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2418688, "linear_attention_total": 2359296, "linear_attention_nnz": 567808, "linear_dense_total": 4718592, "linear_dense_nnz": 1850880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2844416, "linear_attention_total": 2359296, "linear_attention_nnz": 1002752, "linear_dense_total": 4718592, "linear_dense_nnz": 1841664}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2691072, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 1812480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2475264, "linear_attention_total": 2359296, "linear_attention_nnz": 721152, "linear_dense_total": 4718592, "linear_dense_nnz": 1754112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2229248, "linear_attention_total": 2359296, "linear_attention_nnz": 805376, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1966336, "linear_attention_total": 2359296, "linear_attention_nnz": 892672, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701440, "linear_attention_total": 2359296, "linear_attention_nnz": 454144, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598272, "linear_attention_total": 2359296, "linear_attention_nnz": 361728, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858880, "linear_attention_total": 2359296, "linear_attention_nnz": 238336, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}}, "total_sparsity": 57.57962486284496, "linear_sparsity": 73.82179542824075}, "speed": {"eval_elapsed_time": 17.57372920983471}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 86.84346997900737}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-130000": {"stats": {"total": 108893186, "nnz": 38778370, "linear_total": 84934656, "linear_nnz": 14819840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1050624, "linear_attention_total": 2359296, "linear_attention_nnz": 488448, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 512512, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1628160, "linear_attention_total": 2359296, "linear_attention_nnz": 628224, "linear_dense_total": 4718592, "linear_dense_nnz": 999936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998592, "linear_attention_total": 2359296, "linear_attention_nnz": 937216, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1939968, "linear_attention_total": 2359296, "linear_attention_nnz": 821760, "linear_dense_total": 4718592, "linear_dense_nnz": 1118208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1709824, "linear_attention_total": 2359296, "linear_attention_nnz": 648448, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404928, "linear_attention_total": 2359296, "linear_attention_nnz": 641536, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817920, "linear_attention_total": 2359296, "linear_attention_nnz": 467712, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 544512, "linear_attention_total": 2359296, "linear_attention_nnz": 403200, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484096, "linear_attention_total": 2359296, "linear_attention_nnz": 367360, "linear_dense_total": 4718592, "linear_dense_nnz": 116736}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496896, "linear_attention_total": 2359296, "linear_attention_nnz": 225024, "linear_dense_total": 4718592, "linear_dense_nnz": 271872}}, "total_sparsity": 64.38861656596218, "linear_sparsity": 82.5514805169753}, "speed": {"eval_elapsed_time": 15.812317132018507}, "opt_eval_metrics": {"exact_match": 78.88363292336803, "f1": 86.63235572290178}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-165000": {"stats": {"total": 108893186, "nnz": 38293506, "linear_total": 84934656, "linear_nnz": 14334976, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1010688, "linear_attention_total": 2359296, "linear_attention_nnz": 468480, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1371392, "linear_attention_total": 2359296, "linear_attention_nnz": 518912, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1590272, "linear_attention_total": 2359296, "linear_attention_nnz": 608768, "linear_dense_total": 4718592, "linear_dense_nnz": 981504}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895936, "linear_attention_total": 2359296, "linear_attention_nnz": 869888, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1869568, "linear_attention_total": 2359296, "linear_attention_nnz": 775936, "linear_dense_total": 4718592, "linear_dense_nnz": 1093632}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663232, "linear_attention_total": 2359296, "linear_attention_nnz": 618752, "linear_dense_total": 4718592, "linear_dense_nnz": 1044480}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 629248, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1295872, "linear_attention_total": 2359296, "linear_attention_nnz": 707584, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 808704, "linear_attention_total": 2359296, "linear_attention_nnz": 463104, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 515840, "linear_attention_total": 2359296, "linear_attention_nnz": 376064, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455936, "linear_attention_total": 2359296, "linear_attention_nnz": 345344, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 212992, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 64.83388225963009, "linear_sparsity": 83.1223476080247}, "speed": {"eval_elapsed_time": 15.71152348187752}, "opt_eval_metrics": {"exact_match": 78.96877956480606, "f1": 86.71968503618079}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38916354, "linear_total": 84934656, "linear_nnz": 14957824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1209344, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 749568}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1494272, "linear_attention_total": 2359296, "linear_attention_nnz": 488192, "linear_dense_total": 4718592, "linear_dense_nnz": 1006080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1636096, "linear_attention_total": 2359296, "linear_attention_nnz": 550144, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969664, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1746944, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 1198080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1782272, "linear_attention_total": 2359296, "linear_attention_nnz": 653312, "linear_dense_total": 4718592, "linear_dense_nnz": 1128960}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1461760, "linear_attention_total": 2359296, "linear_attention_nnz": 593920, "linear_dense_total": 4718592, "linear_dense_nnz": 867840}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 754688, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531968, "linear_attention_total": 2359296, "linear_attention_nnz": 373760, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460032, "linear_attention_total": 2359296, "linear_attention_nnz": 311040, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}}, "total_sparsity": 64.26190156654981, "linear_sparsity": 82.38902150848766}, "speed": {"eval_elapsed_time": 15.398823922965676}, "opt_eval_metrics": {"exact_match": 78.7038789025544, "f1": 86.58426699451658}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 50872322, "linear_total": 84934656, "linear_nnz": 26913792, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692352, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 2007552}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666496, "linear_attention_total": 2359296, "linear_attention_nnz": 646656, "linear_dense_total": 4718592, "linear_dense_nnz": 2019840}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2931200, "linear_attention_total": 2359296, "linear_attention_nnz": 691712, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3361024, "linear_attention_total": 2359296, "linear_attention_nnz": 1149184, "linear_dense_total": 4718592, "linear_dense_nnz": 2211840}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3165952, "linear_attention_total": 2359296, "linear_attention_nnz": 1007872, "linear_dense_total": 4718592, "linear_dense_nnz": 2158080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070976, "linear_attention_total": 2359296, "linear_attention_nnz": 997376, "linear_dense_total": 4718592, "linear_dense_nnz": 2073600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2644480, "linear_attention_total": 2359296, "linear_attention_nnz": 911872, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2248704, "linear_attention_total": 2359296, "linear_attention_nnz": 944640, "linear_dense_total": 4718592, "linear_dense_nnz": 1304064}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1514240, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 751104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 839424, "linear_attention_total": 2359296, "linear_attention_nnz": 526080, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 707072, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 274944}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1071872, "linear_attention_total": 2359296, "linear_attention_nnz": 277760, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}}, "total_sparsity": 53.282364242699266, "linear_sparsity": 68.31235532407408}, "speed": {"eval_elapsed_time": 19.785655258921906}, "opt_eval_metrics": {"exact_match": 79.99053926206244, "f1": 87.56439208763325}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl225_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 27752545, "linear_total": 84934656, "linear_nnz": 3794015, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 465383, "linear_attention_total": 2359296, "linear_attention_nnz": 18728, "linear_dense_total": 4718592, "linear_dense_nnz": 446655}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527397, "linear_attention_total": 2359296, "linear_attention_nnz": 63059, "linear_dense_total": 4718592, "linear_dense_nnz": 464338}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516492, "linear_attention_total": 2359296, "linear_attention_nnz": 53761, "linear_dense_total": 4718592, "linear_dense_nnz": 462731}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 514972, "linear_attention_total": 2359296, "linear_attention_nnz": 84624, "linear_dense_total": 4718592, "linear_dense_nnz": 430348}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 443214, "linear_attention_total": 2359296, "linear_attention_nnz": 58345, "linear_dense_total": 4718592, "linear_dense_nnz": 384869}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 396921, "linear_attention_total": 2359296, "linear_attention_nnz": 50615, "linear_dense_total": 4718592, "linear_dense_nnz": 346306}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 319004, "linear_attention_total": 2359296, "linear_attention_nnz": 41344, "linear_dense_total": 4718592, "linear_dense_nnz": 277660}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 249183, "linear_attention_total": 2359296, "linear_attention_nnz": 47420, "linear_dense_total": 4718592, "linear_dense_nnz": 201763}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 161062, "linear_attention_total": 2359296, "linear_attention_nnz": 27562, "linear_dense_total": 4718592, "linear_dense_nnz": 133500}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 81705, "linear_attention_total": 2359296, "linear_attention_nnz": 34151, "linear_dense_total": 4718592, "linear_dense_nnz": 47554}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64643, "linear_attention_total": 2359296, "linear_attention_nnz": 21311, "linear_dense_total": 4718592, "linear_dense_nnz": 43332}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 54039, "linear_attention_total": 2359296, "linear_attention_nnz": 17233, "linear_dense_total": 4718592, "linear_dense_nnz": 36806}}, "total_sparsity": 74.51397463933142, "linear_sparsity": 95.5330189363456}, "speed": {"eval_elapsed_time": 28.275199214927852}, "opt_eval_metrics": {"exact_match": 77.39829706717124, "f1": 85.66626983371626}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41730197, "linear_total": 84934656, "linear_nnz": 17822208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2202624, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2397696, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1218048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2302464, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1122816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1794048, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1460736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1155072, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1290240, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 503808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 423936, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 227328}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 806400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 904704, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 511488}}, "total_sparsity": 61.67786201057612, "linear_sparsity": 79.0165653935185}, "speed": {"eval_elapsed_time": 12.337535696104169}, "opt_eval_metrics": {"exact_match": 77.70104068117313, "f1": 85.6071153919288}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 33917936, "linear_total": 84934656, "linear_nnz": 9959406, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1111233, "linear_attention_total": 2359296, "linear_attention_nnz": 56754, "linear_dense_total": 4718592, "linear_dense_nnz": 1054479}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222867, "linear_attention_total": 2359296, "linear_attention_nnz": 116764, "linear_dense_total": 4718592, "linear_dense_nnz": 1106103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264439, "linear_attention_total": 2359296, "linear_attention_nnz": 127558, "linear_dense_total": 4718592, "linear_dense_nnz": 1136881}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1270104, "linear_attention_total": 2359296, "linear_attention_nnz": 163709, "linear_dense_total": 4718592, "linear_dense_nnz": 1106395}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1202300, "linear_attention_total": 2359296, "linear_attention_nnz": 158018, "linear_dense_total": 4718592, "linear_dense_nnz": 1044282}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1136195, "linear_attention_total": 2359296, "linear_attention_nnz": 125746, "linear_dense_total": 4718592, "linear_dense_nnz": 1010449}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 971117, "linear_attention_total": 2359296, "linear_attention_nnz": 110023, "linear_dense_total": 4718592, "linear_dense_nnz": 861094}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746075, "linear_attention_total": 2359296, "linear_attention_nnz": 113086, "linear_dense_total": 4718592, "linear_dense_nnz": 632989}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488971, "linear_attention_total": 2359296, "linear_attention_nnz": 81879, "linear_dense_total": 4718592, "linear_dense_nnz": 407092}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 250695, "linear_attention_total": 2359296, "linear_attention_nnz": 77365, "linear_dense_total": 4718592, "linear_dense_nnz": 173330}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 172793, "linear_attention_total": 2359296, "linear_attention_nnz": 50915, "linear_dense_total": 4718592, "linear_dense_nnz": 121878}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 122617, "linear_attention_total": 2359296, "linear_attention_nnz": 28303, "linear_dense_total": 4718592, "linear_dense_nnz": 94314}}, "total_sparsity": 68.85210429971255, "linear_sparsity": 88.27403739646628}, "speed": {"eval_elapsed_time": 33.620146826142445}, "opt_eval_metrics": {"exact_match": 79.89593188268685, "f1": 87.64967103979136}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33825359, "linear_total": 84934656, "linear_nnz": 9866829, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1100628, "linear_attention_total": 2359296, "linear_attention_nnz": 56086, "linear_dense_total": 4718592, "linear_dense_nnz": 1044542}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211778, "linear_attention_total": 2359296, "linear_attention_nnz": 115328, "linear_dense_total": 4718592, "linear_dense_nnz": 1096450}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253069, "linear_attention_total": 2359296, "linear_attention_nnz": 125881, "linear_dense_total": 4718592, "linear_dense_nnz": 1127188}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258511, "linear_attention_total": 2359296, "linear_attention_nnz": 161525, "linear_dense_total": 4718592, "linear_dense_nnz": 1096986}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1191705, "linear_attention_total": 2359296, "linear_attention_nnz": 155911, "linear_dense_total": 4718592, "linear_dense_nnz": 1035794}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125428, "linear_attention_total": 2359296, "linear_attention_nnz": 123921, "linear_dense_total": 4718592, "linear_dense_nnz": 1001507}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 961919, "linear_attention_total": 2359296, "linear_attention_nnz": 108430, "linear_dense_total": 4718592, "linear_dense_nnz": 853489}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738628, "linear_attention_total": 2359296, "linear_attention_nnz": 111505, "linear_dense_total": 4718592, "linear_dense_nnz": 627123}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484188, "linear_attention_total": 2359296, "linear_attention_nnz": 80805, "linear_dense_total": 4718592, "linear_dense_nnz": 403383}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 247948, "linear_attention_total": 2359296, "linear_attention_nnz": 76456, "linear_dense_total": 4718592, "linear_dense_nnz": 171492}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 171235, "linear_attention_total": 2359296, "linear_attention_nnz": 50374, "linear_dense_total": 4718592, "linear_dense_nnz": 120861}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 121792, "linear_attention_total": 2359296, "linear_attention_nnz": 28038, "linear_dense_total": 4718592, "linear_dense_nnz": 93754}}, "total_sparsity": 68.93712063856779, "linear_sparsity": 88.38303530657733}, "speed": {"eval_elapsed_time": 33.69571442203596}, "opt_eval_metrics": {"exact_match": 79.8391674550615, "f1": 87.59923644792065}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl150_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 29470276, "linear_total": 84934656, "linear_nnz": 5511746, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655184, "linear_attention_total": 2359296, "linear_attention_nnz": 30729, "linear_dense_total": 4718592, "linear_dense_nnz": 624455}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 733131, "linear_attention_total": 2359296, "linear_attention_nnz": 77742, "linear_dense_total": 4718592, "linear_dense_nnz": 655389}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730379, "linear_attention_total": 2359296, "linear_attention_nnz": 70206, "linear_dense_total": 4718592, "linear_dense_nnz": 660173}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 734451, "linear_attention_total": 2359296, "linear_attention_nnz": 106339, "linear_dense_total": 4718592, "linear_dense_nnz": 628112}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655863, "linear_attention_total": 2359296, "linear_attention_nnz": 81845, "linear_dense_total": 4718592, "linear_dense_nnz": 574018}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 606306, "linear_attention_total": 2359296, "linear_attention_nnz": 68554, "linear_dense_total": 4718592, "linear_dense_nnz": 537752}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 492846, "linear_attention_total": 2359296, "linear_attention_nnz": 58217, "linear_dense_total": 4718592, "linear_dense_nnz": 434629}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379389, "linear_attention_total": 2359296, "linear_attention_nnz": 65705, "linear_dense_total": 4718592, "linear_dense_nnz": 313684}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 243207, "linear_attention_total": 2359296, "linear_attention_nnz": 39483, "linear_dense_total": 4718592, "linear_dense_nnz": 203724}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 119606, "linear_attention_total": 2359296, "linear_attention_nnz": 46007, "linear_dense_total": 4718592, "linear_dense_nnz": 73599}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 89281, "linear_attention_total": 2359296, "linear_attention_nnz": 27892, "linear_dense_total": 4718592, "linear_dense_nnz": 61389}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 72103, "linear_attention_total": 2359296, "linear_attention_nnz": 20781, "linear_dense_total": 4718592, "linear_dense_nnz": 51322}}, "total_sparsity": 72.93652882926945, "linear_sparsity": 93.51060419906804}, "speed": {"eval_elapsed_time": 30.31329287402332}, "opt_eval_metrics": {"exact_match": 78.4484389782403, "f1": 86.3547925481507}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 72624802, "linear_total": 84934656, "linear_nnz": 48687104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4657152, "linear_attention_total": 2359296, "linear_attention_nnz": 621568, "linear_dense_total": 4718592, "linear_dense_nnz": 4035584}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4759552, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 4155392}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5065728, "linear_attention_total": 2359296, "linear_attention_nnz": 781312, "linear_dense_total": 4718592, "linear_dense_nnz": 4284416}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5408768, "linear_attention_total": 2359296, "linear_attention_nnz": 1068032, "linear_dense_total": 4718592, "linear_dense_nnz": 4340736}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5324800, "linear_attention_total": 2359296, "linear_attention_nnz": 1087488, "linear_dense_total": 4718592, "linear_dense_nnz": 4237312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5189632, "linear_attention_total": 2359296, "linear_attention_nnz": 908288, "linear_dense_total": 4718592, "linear_dense_nnz": 4281344}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5169152, "linear_attention_total": 2359296, "linear_attention_nnz": 1019904, "linear_dense_total": 4718592, "linear_dense_nnz": 4149248}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4749312, "linear_attention_total": 2359296, "linear_attention_nnz": 921600, "linear_dense_total": 4718592, "linear_dense_nnz": 3827712}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3932160, "linear_attention_total": 2359296, "linear_attention_nnz": 851968, "linear_dense_total": 4718592, "linear_dense_nnz": 3080192}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1808384, "linear_attention_total": 2359296, "linear_attention_nnz": 529408, "linear_dense_total": 4718592, "linear_dense_nnz": 1278976}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1443840, "linear_attention_total": 2359296, "linear_attention_nnz": 486400, "linear_dense_total": 4718592, "linear_dense_nnz": 957440}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178624, "linear_attention_total": 2359296, "linear_attention_nnz": 286720, "linear_dense_total": 4718592, "linear_dense_nnz": 891904}}, "total_sparsity": 33.306385213120684, "linear_sparsity": 42.67698688271605}, "speed": {"eval_elapsed_time": 27.85703947697766}, "opt_eval_metrics": {"exact_match": 80.72847682119205, "f1": 88.08831525592305}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 72671586, "linear_total": 84934656, "linear_nnz": 48734208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4676608, "linear_attention_total": 2359296, "linear_attention_nnz": 644096, "linear_dense_total": 4718592, "linear_dense_nnz": 4032512}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4756480, "linear_attention_total": 2359296, "linear_attention_nnz": 583680, "linear_dense_total": 4718592, "linear_dense_nnz": 4172800}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5113856, "linear_attention_total": 2359296, "linear_attention_nnz": 789504, "linear_dense_total": 4718592, "linear_dense_nnz": 4324352}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5421056, "linear_attention_total": 2359296, "linear_attention_nnz": 1028096, "linear_dense_total": 4718592, "linear_dense_nnz": 4392960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5426176, "linear_attention_total": 2359296, "linear_attention_nnz": 1067008, "linear_dense_total": 4718592, "linear_dense_nnz": 4359168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5276672, "linear_attention_total": 2359296, "linear_attention_nnz": 943104, "linear_dense_total": 4718592, "linear_dense_nnz": 4333568}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5165056, "linear_attention_total": 2359296, "linear_attention_nnz": 1003520, "linear_dense_total": 4718592, "linear_dense_nnz": 4161536}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4797440, "linear_attention_total": 2359296, "linear_attention_nnz": 908288, "linear_dense_total": 4718592, "linear_dense_nnz": 3889152}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3890176, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 3021824}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1726464, "linear_attention_total": 2359296, "linear_attention_nnz": 520192, "linear_dense_total": 4718592, "linear_dense_nnz": 1206272}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1336320, "linear_attention_total": 2359296, "linear_attention_nnz": 445440, "linear_dense_total": 4718592, "linear_dense_nnz": 890880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1147904, "linear_attention_total": 2359296, "linear_attention_nnz": 272384, "linear_dense_total": 4718592, "linear_dense_nnz": 875520}}, "total_sparsity": 33.26342201062975, "linear_sparsity": 42.62152777777778}, "speed": {"eval_elapsed_time": 27.81183459307067}, "opt_eval_metrics": {"exact_match": 80.81362346263009, "f1": 88.10463591853348}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 72950082, "linear_total": 84934656, "linear_nnz": 49012736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4754432, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 4140032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4843520, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 4246528}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5145600, "linear_attention_total": 2359296, "linear_attention_nnz": 788480, "linear_dense_total": 4718592, "linear_dense_nnz": 4357120}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5488640, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 4426752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5463040, "linear_attention_total": 2359296, "linear_attention_nnz": 1048576, "linear_dense_total": 4718592, "linear_dense_nnz": 4414464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5317632, "linear_attention_total": 2359296, "linear_attention_nnz": 918528, "linear_dense_total": 4718592, "linear_dense_nnz": 4399104}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5230592, "linear_attention_total": 2359296, "linear_attention_nnz": 998400, "linear_dense_total": 4718592, "linear_dense_nnz": 4232192}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4838400, "linear_attention_total": 2359296, "linear_attention_nnz": 899072, "linear_dense_total": 4718592, "linear_dense_nnz": 3939328}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3848192, "linear_attention_total": 2359296, "linear_attention_nnz": 819200, "linear_dense_total": 4718592, "linear_dense_nnz": 3028992}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1620992, "linear_attention_total": 2359296, "linear_attention_nnz": 516096, "linear_dense_total": 4718592, "linear_dense_nnz": 1104896}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1332224, "linear_attention_total": 2359296, "linear_attention_nnz": 450560, "linear_dense_total": 4718592, "linear_dense_nnz": 881664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1129472, "linear_attention_total": 2359296, "linear_attention_nnz": 266240, "linear_dense_total": 4718592, "linear_dense_nnz": 863232}}, "total_sparsity": 33.00767047076757, "linear_sparsity": 42.29359567901234}, "speed": {"eval_elapsed_time": 27.788447924889624}, "opt_eval_metrics": {"exact_match": 80.53926206244087, "f1": 87.95145431777735}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39356610, "linear_total": 84934656, "linear_nnz": 15444992, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1024000, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 512000}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236992, "linear_attention_total": 2359296, "linear_attention_nnz": 551936, "linear_dense_total": 4718592, "linear_dense_nnz": 685056}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1934336, "linear_attention_total": 2359296, "linear_attention_nnz": 722944, "linear_dense_total": 4718592, "linear_dense_nnz": 1211392}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2352128, "linear_attention_total": 2359296, "linear_attention_nnz": 954368, "linear_dense_total": 4718592, "linear_dense_nnz": 1397760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028544, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1238016}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1880064, "linear_attention_total": 2359296, "linear_attention_nnz": 584704, "linear_dense_total": 4718592, "linear_dense_nnz": 1295360}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1627136, "linear_attention_total": 2359296, "linear_attention_nnz": 608256, "linear_dense_total": 4718592, "linear_dense_nnz": 1018880}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1316864, "linear_attention_total": 2359296, "linear_attention_nnz": 740352, "linear_dense_total": 4718592, "linear_dense_nnz": 576512}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 673792, "linear_attention_total": 2359296, "linear_attention_nnz": 510976, "linear_dense_total": 4718592, "linear_dense_nnz": 162816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 451584, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 94208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 304128, "linear_dense_total": 4718592, "linear_dense_nnz": 197632}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417792, "linear_attention_total": 2359296, "linear_attention_nnz": 197632, "linear_dense_total": 4718592, "linear_dense_nnz": 220160}}, "total_sparsity": 63.85760078688487, "linear_sparsity": 81.81544174382715}, "speed": {"eval_elapsed_time": 17.21582882408984}, "opt_eval_metrics": {"exact_match": 76.79280983916746, "f1": 85.3167029862563}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39183362, "linear_total": 84934656, "linear_nnz": 15271936, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1032192, "linear_attention_total": 2359296, "linear_attention_nnz": 513024, "linear_dense_total": 4718592, "linear_dense_nnz": 519168}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1215488, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 692224}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1922048, "linear_attention_total": 2359296, "linear_attention_nnz": 683008, "linear_dense_total": 4718592, "linear_dense_nnz": 1239040}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2319360, "linear_attention_total": 2359296, "linear_attention_nnz": 945152, "linear_dense_total": 4718592, "linear_dense_nnz": 1374208}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045952, "linear_attention_total": 2359296, "linear_attention_nnz": 809984, "linear_dense_total": 4718592, "linear_dense_nnz": 1235968}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1847296, "linear_attention_total": 2359296, "linear_attention_nnz": 581632, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1607680, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1287168, "linear_attention_total": 2359296, "linear_attention_nnz": 708608, "linear_dense_total": 4718592, "linear_dense_nnz": 578560}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631808, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 158720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 442368, "linear_attention_total": 2359296, "linear_attention_nnz": 352256, "linear_dense_total": 4718592, "linear_dense_nnz": 90112}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 312320, "linear_dense_total": 4718592, "linear_dense_nnz": 206848}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 401408, "linear_attention_total": 2359296, "linear_attention_nnz": 186368, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 64.01669981444019, "linear_sparsity": 82.0191936728395}, "speed": {"eval_elapsed_time": 17.197634894168004}, "opt_eval_metrics": {"exact_match": 77.04824976348155, "f1": 85.17930403802184}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 37334018, "linear_total": 84934656, "linear_nnz": 13375488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663488, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1282560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451520, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 566784}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1385472, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1265664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 675840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1069056, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.7150099364344, "linear_sparsity": 84.25202546296296}, "speed": {"eval_elapsed_time": 11.265181887894869}, "opt_eval_metrics": {"exact_match": 76.54683065279092, "f1": 84.56290825102765}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37189634, "linear_total": 84934656, "linear_nnz": 13231104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1658880, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1233408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1354752, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1387008, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 678912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.84760225492897, "linear_sparsity": 84.42201967592592}, "speed": {"eval_elapsed_time": 11.085542490938678}, "opt_eval_metrics": {"exact_match": 75.99810785241249, "f1": 84.26442986520863}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36773378, "linear_total": 84934656, "linear_nnz": 12814848, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1044480, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1177088, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1450496, "linear_attention_total": 2359296, "linear_attention_nnz": 492032, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652224, "linear_attention_total": 2359296, "linear_attention_nnz": 733696, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1511680, "linear_attention_total": 2359296, "linear_attention_nnz": 461056, "linear_dense_total": 4718592, "linear_dense_nnz": 1050624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1533952, "linear_attention_total": 2359296, "linear_attention_nnz": 580096, "linear_dense_total": 4718592, "linear_dense_nnz": 953856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1227520, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 764928}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 624384, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 700416, "linear_attention_total": 2359296, "linear_attention_nnz": 351744, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 479744, "linear_attention_total": 2359296, "linear_attention_nnz": 339968, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411392, "linear_attention_total": 2359296, "linear_attention_nnz": 276224, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 430080, "linear_attention_total": 2359296, "linear_attention_nnz": 178176, "linear_dense_total": 4718592, "linear_dense_nnz": 251904}}, "total_sparsity": 66.22986308803564, "linear_sparsity": 84.912109375}, "speed": {"eval_elapsed_time": 14.512992850970477}, "opt_eval_metrics": {"exact_match": 77.94701986754967, "f1": 86.06827252573265}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 47136529, "linear_total": 84934656, "linear_nnz": 23220736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1984512, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2164224, "linear_attention_total": 2359296, "linear_attention_nnz": 592896, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2625536, "linear_attention_total": 2359296, "linear_attention_nnz": 880640, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2992640, "linear_attention_total": 2359296, "linear_attention_nnz": 1230848, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2940928, "linear_attention_total": 2359296, "linear_attention_nnz": 1214464, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2535936, "linear_attention_total": 2359296, "linear_attention_nnz": 906240, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2213376, "linear_attention_total": 2359296, "linear_attention_nnz": 943104, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1923584, "linear_attention_total": 2359296, "linear_attention_nnz": 935936, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1419264, "linear_attention_total": 2359296, "linear_attention_nnz": 872448, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 883712, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667648, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 869376, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 56.713059162397904, "linear_sparsity": 72.66046971450618}, "speed": {"eval_elapsed_time": 18.717300809919834}, "opt_eval_metrics": {"exact_match": 80.27436140018922, "f1": 87.70461789964966}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46572775, "linear_total": 84934656, "linear_nnz": 22657536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1908736, "linear_attention_total": 2359296, "linear_attention_nnz": 627712, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2145280, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 1548288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2499072, "linear_attention_total": 2359296, "linear_attention_nnz": 789504, "linear_dense_total": 4718592, "linear_dense_nnz": 1709568}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2920960, "linear_attention_total": 2359296, "linear_attention_nnz": 1180672, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1204224, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2516992, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 1600512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2151936, "linear_attention_total": 2359296, "linear_attention_nnz": 909312, "linear_dense_total": 4718592, "linear_dense_nnz": 1242624}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1889792, "linear_attention_total": 2359296, "linear_attention_nnz": 917504, "linear_dense_total": 4718592, "linear_dense_nnz": 972288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1398272, "linear_attention_total": 2359296, "linear_attention_nnz": 856064, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858624, "linear_attention_total": 2359296, "linear_attention_nnz": 611328, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 634368, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 827392, "linear_attention_total": 2359296, "linear_attention_nnz": 268288, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}}, "total_sparsity": 57.23077199706509, "linear_sparsity": 73.32356770833333}, "speed": {"eval_elapsed_time": 18.605645736912265}, "opt_eval_metrics": {"exact_match": 79.80132450331126, "f1": 87.48291010744668}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-10000": {"stats": {"total": 108893186, "nnz": 107798786, "linear_total": 84934656, "linear_nnz": 83840256, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6865920, "linear_attention_total": 2359296, "linear_attention_nnz": 2151936, "linear_dense_total": 4718592, "linear_dense_nnz": 4713984}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7009024, "linear_attention_total": 2359296, "linear_attention_nnz": 2299648, "linear_dense_total": 4718592, "linear_dense_nnz": 4709376}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7037952, "linear_attention_total": 2359296, "linear_attention_nnz": 2330112, "linear_dense_total": 4718592, "linear_dense_nnz": 4707840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7041024, "linear_attention_total": 2359296, "linear_attention_nnz": 2330112, "linear_dense_total": 4718592, "linear_dense_nnz": 4710912}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7029760, "linear_attention_total": 2359296, "linear_attention_nnz": 2324992, "linear_dense_total": 4718592, "linear_dense_nnz": 4704768}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7043584, "linear_attention_total": 2359296, "linear_attention_nnz": 2337280, "linear_dense_total": 4718592, "linear_dense_nnz": 4706304}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7006464, "linear_attention_total": 2359296, "linear_attention_nnz": 2321664, "linear_dense_total": 4718592, "linear_dense_nnz": 4684800}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7025664, "linear_attention_total": 2359296, "linear_attention_nnz": 2342400, "linear_dense_total": 4718592, "linear_dense_nnz": 4683264}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6950656, "linear_attention_total": 2359296, "linear_attention_nnz": 2296576, "linear_dense_total": 4718592, "linear_dense_nnz": 4654080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6905600, "linear_attention_total": 2359296, "linear_attention_nnz": 2259200, "linear_dense_total": 4718592, "linear_dense_nnz": 4646400}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6924288, "linear_attention_total": 2359296, "linear_attention_nnz": 2285568, "linear_dense_total": 4718592, "linear_dense_nnz": 4638720}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7000320, "linear_attention_total": 2359296, "linear_attention_nnz": 2312448, "linear_dense_total": 4718592, "linear_dense_nnz": 4687872}}, "total_sparsity": 1.005021563057218, "linear_sparsity": 1.288519965277779}, "speed": {"eval_elapsed_time": 41.11115196393803}, "opt_eval_metrics": {"exact_match": 80.90823084200568, "f1": 88.13888839423888}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-15000": {"stats": {"total": 108893186, "nnz": 104455426, "linear_total": 84934656, "linear_nnz": 80496896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6593280, "linear_attention_total": 2359296, "linear_attention_nnz": 1914624, "linear_dense_total": 4718592, "linear_dense_nnz": 4678656}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6772736, "linear_attention_total": 2359296, "linear_attention_nnz": 2103296, "linear_dense_total": 4718592, "linear_dense_nnz": 4669440}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6885632, "linear_attention_total": 2359296, "linear_attention_nnz": 2239232, "linear_dense_total": 4718592, "linear_dense_nnz": 4646400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6876672, "linear_attention_total": 2359296, "linear_attention_nnz": 2219520, "linear_dense_total": 4718592, "linear_dense_nnz": 4657152}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6873600, "linear_attention_total": 2359296, "linear_attention_nnz": 2216448, "linear_dense_total": 4718592, "linear_dense_nnz": 4657152}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6841856, "linear_attention_total": 2359296, "linear_attention_nnz": 2226176, "linear_dense_total": 4718592, "linear_dense_nnz": 4615680}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6786560, "linear_attention_total": 2359296, "linear_attention_nnz": 2190848, "linear_dense_total": 4718592, "linear_dense_nnz": 4595712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6811392, "linear_attention_total": 2359296, "linear_attention_nnz": 2261760, "linear_dense_total": 4718592, "linear_dense_nnz": 4549632}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6609408, "linear_attention_total": 2359296, "linear_attention_nnz": 2178048, "linear_dense_total": 4718592, "linear_dense_nnz": 4431360}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6399744, "linear_attention_total": 2359296, "linear_attention_nnz": 2049792, "linear_dense_total": 4718592, "linear_dense_nnz": 4349952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6406656, "linear_attention_total": 2359296, "linear_attention_nnz": 2053632, "linear_dense_total": 4718592, "linear_dense_nnz": 4353024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6639360, "linear_attention_total": 2359296, "linear_attention_nnz": 2100480, "linear_dense_total": 4718592, "linear_dense_nnz": 4538880}}, "total_sparsity": 4.075333051601593, "linear_sparsity": 5.224910783179015}, "speed": {"eval_elapsed_time": 40.64612381509505}, "opt_eval_metrics": {"exact_match": 78.21192052980132, "f1": 86.2154189083501}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 58295010, "linear_total": 84934656, "linear_nnz": 34364416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2712576, "linear_attention_total": 2359296, "linear_attention_nnz": 934912, "linear_dense_total": 4718592, "linear_dense_nnz": 1777664}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2907136, "linear_attention_total": 2359296, "linear_attention_nnz": 738304, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4201472, "linear_attention_total": 2359296, "linear_attention_nnz": 1162240, "linear_dense_total": 4718592, "linear_dense_nnz": 3039232}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4531200, "linear_attention_total": 2359296, "linear_attention_nnz": 1366016, "linear_dense_total": 4718592, "linear_dense_nnz": 3165184}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4667392, "linear_attention_total": 2359296, "linear_attention_nnz": 1484800, "linear_dense_total": 4718592, "linear_dense_nnz": 3182592}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4599808, "linear_attention_total": 2359296, "linear_attention_nnz": 1414144, "linear_dense_total": 4718592, "linear_dense_nnz": 3185664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3828736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 2572288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2659328, "linear_attention_total": 2359296, "linear_attention_nnz": 991232, "linear_dense_total": 4718592, "linear_dense_nnz": 1668096}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1654784, "linear_attention_total": 2359296, "linear_attention_nnz": 966656, "linear_dense_total": 4718592, "linear_dense_nnz": 688128}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 927744, "linear_attention_total": 2359296, "linear_attention_nnz": 691200, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 873472, "linear_attention_total": 2359296, "linear_attention_nnz": 530432, "linear_dense_total": 4718592, "linear_dense_nnz": 343040}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 800768, "linear_attention_total": 2359296, "linear_attention_nnz": 378880, "linear_dense_total": 4718592, "linear_dense_nnz": 421888}}, "total_sparsity": 46.46587895775224, "linear_sparsity": 59.540171682098766}, "speed": {"eval_elapsed_time": 25.710868231020868}, "opt_eval_metrics": {"exact_match": 79.64995269631031, "f1": 87.30139925832849}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 56885634, "linear_total": 84934656, "linear_nnz": 32956416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2584576, "linear_attention_total": 2359296, "linear_attention_nnz": 949248, "linear_dense_total": 4718592, "linear_dense_nnz": 1635328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2798592, "linear_attention_total": 2359296, "linear_attention_nnz": 750592, "linear_dense_total": 4718592, "linear_dense_nnz": 2048000}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4019200, "linear_attention_total": 2359296, "linear_attention_nnz": 1123328, "linear_dense_total": 4718592, "linear_dense_nnz": 2895872}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4274176, "linear_attention_total": 2359296, "linear_attention_nnz": 1306624, "linear_dense_total": 4718592, "linear_dense_nnz": 2967552}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4581376, "linear_attention_total": 2359296, "linear_attention_nnz": 1475584, "linear_dense_total": 4718592, "linear_dense_nnz": 3105792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4219904, "linear_attention_total": 2359296, "linear_attention_nnz": 1285120, "linear_dense_total": 4718592, "linear_dense_nnz": 2934784}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3736576, "linear_attention_total": 2359296, "linear_attention_nnz": 1235968, "linear_dense_total": 4718592, "linear_dense_nnz": 2500608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2587648, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1604608}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1627136, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 661504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 880640, "linear_attention_total": 2359296, "linear_attention_nnz": 650240, "linear_dense_total": 4718592, "linear_dense_nnz": 230400}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 862208, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 352256}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784384, "linear_attention_total": 2359296, "linear_attention_nnz": 363520, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}}, "total_sparsity": 47.76015277944021, "linear_sparsity": 61.19791666666667}, "speed": {"eval_elapsed_time": 25.134117686888203}, "opt_eval_metrics": {"exact_match": 79.90539262062441, "f1": 87.36378709007766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 55520034, "linear_total": 84934656, "linear_nnz": 31592448, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2382848, "linear_attention_total": 2359296, "linear_attention_nnz": 889856, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2568192, "linear_attention_total": 2359296, "linear_attention_nnz": 717824, "linear_dense_total": 4718592, "linear_dense_nnz": 1850368}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3915776, "linear_attention_total": 2359296, "linear_attention_nnz": 1113088, "linear_dense_total": 4718592, "linear_dense_nnz": 2802688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4258816, "linear_attention_total": 2359296, "linear_attention_nnz": 1297408, "linear_dense_total": 4718592, "linear_dense_nnz": 2961408}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4300800, "linear_attention_total": 2359296, "linear_attention_nnz": 1402880, "linear_dense_total": 4718592, "linear_dense_nnz": 2897920}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4030464, "linear_attention_total": 2359296, "linear_attention_nnz": 1157120, "linear_dense_total": 4718592, "linear_dense_nnz": 2873344}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3661824, "linear_attention_total": 2359296, "linear_attention_nnz": 1187840, "linear_dense_total": 4718592, "linear_dense_nnz": 2473984}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2507776, "linear_attention_total": 2359296, "linear_attention_nnz": 979968, "linear_dense_total": 4718592, "linear_dense_nnz": 1527808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1562624, "linear_attention_total": 2359296, "linear_attention_nnz": 952320, "linear_dense_total": 4718592, "linear_dense_nnz": 610304}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 865280, "linear_attention_total": 2359296, "linear_attention_nnz": 642048, "linear_dense_total": 4718592, "linear_dense_nnz": 223232}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818176, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 328704}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 719872, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 388096}}, "total_sparsity": 49.0142257386059, "linear_sparsity": 62.80381944444444}, "speed": {"eval_elapsed_time": 24.50548317306675}, "opt_eval_metrics": {"exact_match": 79.82024597918638, "f1": 87.30735739624531}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 55329122, "linear_total": 84934656, "linear_nnz": 31404032, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2845696, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 2385920}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3173376, "linear_attention_total": 2359296, "linear_attention_nnz": 374784, "linear_dense_total": 4718592, "linear_dense_nnz": 2798592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3866624, "linear_attention_total": 2359296, "linear_attention_nnz": 411648, "linear_dense_total": 4718592, "linear_dense_nnz": 3454976}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4224000, "linear_attention_total": 2359296, "linear_attention_nnz": 727040, "linear_dense_total": 4718592, "linear_dense_nnz": 3496960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3954688, "linear_attention_total": 2359296, "linear_attention_nnz": 541696, "linear_dense_total": 4718592, "linear_dense_nnz": 3412992}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3993600, "linear_attention_total": 2359296, "linear_attention_nnz": 545792, "linear_dense_total": 4718592, "linear_dense_nnz": 3447808}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3427328, "linear_attention_total": 2359296, "linear_attention_nnz": 493568, "linear_dense_total": 4718592, "linear_dense_nnz": 2933760}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2641920, "linear_attention_total": 2359296, "linear_attention_nnz": 641024, "linear_dense_total": 4718592, "linear_dense_nnz": 2000896}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1293312, "linear_attention_total": 2359296, "linear_attention_nnz": 288768, "linear_dense_total": 4718592, "linear_dense_nnz": 1004544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 678912, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 339968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 671744, "linear_attention_total": 2359296, "linear_attention_nnz": 254976, "linear_dense_total": 4718592, "linear_dense_nnz": 416768}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 632832, "linear_attention_total": 2359296, "linear_attention_nnz": 165888, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}}, "total_sparsity": 49.1895461668281, "linear_sparsity": 63.025655864197525}, "speed": {"eval_elapsed_time": 21.893441491993144}, "opt_eval_metrics": {"exact_match": 77.68211920529801, "f1": 86.11161494070976}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 54788706, "linear_total": 84934656, "linear_nnz": 30864384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2661376, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 2226176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3087360, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 2727936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3895296, "linear_attention_total": 2359296, "linear_attention_nnz": 421888, "linear_dense_total": 4718592, "linear_dense_nnz": 3473408}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4162560, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 3451904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3985408, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 3437568}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3881984, "linear_attention_total": 2359296, "linear_attention_nnz": 556032, "linear_dense_total": 4718592, "linear_dense_nnz": 3325952}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3340288, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 2828288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2614272, "linear_attention_total": 2359296, "linear_attention_nnz": 622592, "linear_dense_total": 4718592, "linear_dense_nnz": 1991680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1256448, "linear_attention_total": 2359296, "linear_attention_nnz": 276480, "linear_dense_total": 4718592, "linear_dense_nnz": 979968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668672, "linear_attention_total": 2359296, "linear_attention_nnz": 337920, "linear_dense_total": 4718592, "linear_dense_nnz": 330752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 664576, "linear_attention_total": 2359296, "linear_attention_nnz": 252928, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 646144, "linear_attention_total": 2359296, "linear_attention_nnz": 158720, "linear_dense_total": 4718592, "linear_dense_nnz": 487424}}, "total_sparsity": 49.68582699012958, "linear_sparsity": 63.66102430555556}, "speed": {"eval_elapsed_time": 21.611296633956954}, "opt_eval_metrics": {"exact_match": 77.96594134342479, "f1": 86.01491496793933}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53430466, "linear_total": 84934656, "linear_nnz": 29507584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2533376, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 2119680}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2840576, "linear_attention_total": 2359296, "linear_attention_nnz": 364544, "linear_dense_total": 4718592, "linear_dense_nnz": 2476032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3843072, "linear_attention_total": 2359296, "linear_attention_nnz": 397312, "linear_dense_total": 4718592, "linear_dense_nnz": 3445760}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4069376, "linear_attention_total": 2359296, "linear_attention_nnz": 666624, "linear_dense_total": 4718592, "linear_dense_nnz": 3402752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3831808, "linear_attention_total": 2359296, "linear_attention_nnz": 492544, "linear_dense_total": 4718592, "linear_dense_nnz": 3339264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3714048, "linear_attention_total": 2359296, "linear_attention_nnz": 519168, "linear_dense_total": 4718592, "linear_dense_nnz": 3194880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3200000, "linear_attention_total": 2359296, "linear_attention_nnz": 448512, "linear_dense_total": 4718592, "linear_dense_nnz": 2751488}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415616, "linear_attention_total": 2359296, "linear_attention_nnz": 576512, "linear_dense_total": 4718592, "linear_dense_nnz": 1839104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211392, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619520, "linear_attention_total": 2359296, "linear_attention_nnz": 317440, "linear_dense_total": 4718592, "linear_dense_nnz": 302080}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 623616, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 386048}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 50.93314103235074, "linear_sparsity": 65.25848765432099}, "speed": {"eval_elapsed_time": 21.071897589135915}, "opt_eval_metrics": {"exact_match": 77.70104068117313, "f1": 85.88451743537976}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 40733175, "linear_total": 84934656, "linear_nnz": 16822784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1394688, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1640960, "linear_attention_total": 2359296, "linear_attention_nnz": 539648, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1878528, "linear_attention_total": 2359296, "linear_attention_nnz": 657408, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 864256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1902592, "linear_attention_total": 2359296, "linear_attention_nnz": 686080, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601536, "linear_attention_total": 2359296, "linear_attention_nnz": 649216, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1507328, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908800, "linear_attention_total": 2359296, "linear_attention_nnz": 474112, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 354304, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591872, "linear_attention_total": 2359296, "linear_attention_nnz": 226304, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 62.593458327135366, "linear_sparsity": 80.19326292438271}, "speed": {"eval_elapsed_time": 16.06849605194293}, "opt_eval_metrics": {"exact_match": 78.68495742667928, "f1": 86.66781681977909}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40239113, "linear_total": 84934656, "linear_nnz": 16329216, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1344512, "linear_attention_total": 2359296, "linear_attention_nnz": 518144, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 516096, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842176, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2097664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1184256}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2056192, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 664576, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1565184, "linear_attention_total": 2359296, "linear_attention_nnz": 629760, "linear_dense_total": 4718592, "linear_dense_nnz": 935424}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1486336, "linear_attention_total": 2359296, "linear_attention_nnz": 787456, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 844288, "linear_attention_total": 2359296, "linear_attention_nnz": 415744, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 423936, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 472064, "linear_attention_total": 2359296, "linear_attention_nnz": 324608, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555520, "linear_attention_total": 2359296, "linear_attention_nnz": 209920, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}}, "total_sparsity": 63.04717083032174, "linear_sparsity": 80.7743778935185}, "speed": {"eval_elapsed_time": 15.895570316817611}, "opt_eval_metrics": {"exact_match": 78.80794701986756, "f1": 86.74156854566804}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.9999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-5000": {"stats": {"total": 108893186, "nnz": 108881410, "linear_total": 84934656, "linear_nnz": 84922880, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7072768, "linear_attention_total": 2359296, "linear_attention_nnz": 2354176, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7076352, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4717056}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7076864, "linear_attention_total": 2359296, "linear_attention_nnz": 2358272, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7076864, "linear_attention_total": 2359296, "linear_attention_nnz": 2358272, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7074816, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4715520}}, "total_sparsity": 0.010814267111258768, "linear_sparsity": 0.013864776234573384}, "speed": {"eval_elapsed_time": 40.132621727185324}, "opt_eval_metrics": {"exact_match": 78.7038789025544, "f1": 86.6699349353281}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 43189250, "linear_total": 84934656, "linear_nnz": 19230720, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1584896, "linear_attention_total": 2359296, "linear_attention_nnz": 494336, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1917184, "linear_attention_total": 2359296, "linear_attention_nnz": 631552, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2092032, "linear_attention_total": 2359296, "linear_attention_nnz": 648192, "linear_dense_total": 4718592, "linear_dense_nnz": 1443840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2466816, "linear_attention_total": 2359296, "linear_attention_nnz": 1047552, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2403328, "linear_attention_total": 2359296, "linear_attention_nnz": 942592, "linear_dense_total": 4718592, "linear_dense_nnz": 1460736}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2211072, "linear_attention_total": 2359296, "linear_attention_nnz": 837888, "linear_dense_total": 4718592, "linear_dense_nnz": 1373184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1936640, "linear_attention_total": 2359296, "linear_attention_nnz": 841472, "linear_dense_total": 4718592, "linear_dense_nnz": 1095168}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661440, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 827904}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1084160, "linear_attention_total": 2359296, "linear_attention_nnz": 621824, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621056, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 188928}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 568064, "linear_attention_total": 2359296, "linear_attention_nnz": 411392, "linear_dense_total": 4718592, "linear_dense_nnz": 156672}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 684032, "linear_attention_total": 2359296, "linear_attention_nnz": 223232, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 60.33796825450584, "linear_sparsity": 77.3582175925926}, "speed": {"eval_elapsed_time": 18.20940860803239}, "opt_eval_metrics": {"exact_match": 78.73226111636707, "f1": 86.74884583609185}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42070530, "linear_total": 84934656, "linear_nnz": 18112000, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451008, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 970752}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835264, "linear_attention_total": 2359296, "linear_attention_nnz": 620288, "linear_dense_total": 4718592, "linear_dense_nnz": 1214976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2000384, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1374720}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2271232, "linear_attention_total": 2359296, "linear_attention_nnz": 933376, "linear_dense_total": 4718592, "linear_dense_nnz": 1337856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2267904, "linear_attention_total": 2359296, "linear_attention_nnz": 862464, "linear_dense_total": 4718592, "linear_dense_nnz": 1405440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081536, "linear_attention_total": 2359296, "linear_attention_nnz": 783616, "linear_dense_total": 4718592, "linear_dense_nnz": 1297920}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1807104, "linear_attention_total": 2359296, "linear_attention_nnz": 773376, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602048, "linear_attention_total": 2359296, "linear_attention_nnz": 811008, "linear_dense_total": 4718592, "linear_dense_nnz": 791040}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1009920, "linear_attention_total": 2359296, "linear_attention_nnz": 572160, "linear_dense_total": 4718592, "linear_dense_nnz": 437760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 543232, "linear_attention_total": 2359296, "linear_attention_nnz": 392704, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649472, "linear_attention_total": 2359296, "linear_attention_nnz": 214784, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}}, "total_sparsity": 61.365323630075444, "linear_sparsity": 78.67537133487654}, "speed": {"eval_elapsed_time": 18.07671318203211}, "opt_eval_metrics": {"exact_match": 78.97824030274361, "f1": 86.77789246016766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41670402, "linear_total": 84934656, "linear_nnz": 17711872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1409024, "linear_attention_total": 2359296, "linear_attention_nnz": 468992, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1792000, "linear_attention_total": 2359296, "linear_attention_nnz": 606208, "linear_dense_total": 4718592, "linear_dense_nnz": 1185792}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1974272, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1348608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2231552, "linear_attention_total": 2359296, "linear_attention_nnz": 910592, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2209536, "linear_attention_total": 2359296, "linear_attention_nnz": 828672, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2046464, "linear_attention_total": 2359296, "linear_attention_nnz": 765440, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1764096, "linear_attention_total": 2359296, "linear_attention_nnz": 761088, "linear_dense_total": 4718592, "linear_dense_nnz": 1003008}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1573120, "linear_attention_total": 2359296, "linear_attention_nnz": 792832, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 986880, "linear_attention_total": 2359296, "linear_attention_nnz": 553728, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 572672, "linear_attention_total": 2359296, "linear_attention_nnz": 389888, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 525568, "linear_attention_total": 2359296, "linear_attention_nnz": 378112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}}, "total_sparsity": 61.73277361909495, "linear_sparsity": 79.14647231867285}, "speed": {"eval_elapsed_time": 17.641912897117436}, "opt_eval_metrics": {"exact_match": 78.74172185430463, "f1": 86.69521763053608}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41478658, "linear_total": 84934656, "linear_nnz": 17520128, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404160, "linear_attention_total": 2359296, "linear_attention_nnz": 465664, "linear_dense_total": 4718592, "linear_dense_nnz": 938496}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1766912, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1182720}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1961216, "linear_attention_total": 2359296, "linear_attention_nnz": 615680, "linear_dense_total": 4718592, "linear_dense_nnz": 1345536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 895488, "linear_dense_total": 4718592, "linear_dense_nnz": 1314816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2189824, "linear_attention_total": 2359296, "linear_attention_nnz": 812032, "linear_dense_total": 4718592, "linear_dense_nnz": 1377792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2038016, "linear_attention_total": 2359296, "linear_attention_nnz": 755456, "linear_dense_total": 4718592, "linear_dense_nnz": 1282560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1738240, "linear_attention_total": 2359296, "linear_attention_nnz": 739840, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571584, "linear_attention_total": 2359296, "linear_attention_nnz": 797440, "linear_dense_total": 4718592, "linear_dense_nnz": 774144}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 943872, "linear_attention_total": 2359296, "linear_attention_nnz": 513792, "linear_dense_total": 4718592, "linear_dense_nnz": 430080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563968, "linear_attention_total": 2359296, "linear_attention_nnz": 381184, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516352, "linear_attention_total": 2359296, "linear_attention_nnz": 370432, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 615680, "linear_attention_total": 2359296, "linear_attention_nnz": 200960, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}}, "total_sparsity": 61.90885809879785, "linear_sparsity": 79.37222704475309}, "speed": {"eval_elapsed_time": 17.640617809956893}, "opt_eval_metrics": {"exact_match": 78.84578997161779, "f1": 86.78133258210022}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40218943, "linear_total": 84934656, "linear_nnz": 16260413, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725694, "linear_attention_total": 2359296, "linear_attention_nnz": 36794, "linear_dense_total": 4718592, "linear_dense_nnz": 1688900}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959620, "linear_attention_total": 2359296, "linear_attention_nnz": 233028, "linear_dense_total": 4718592, "linear_dense_nnz": 1726592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969125, "linear_attention_total": 2359296, "linear_attention_nnz": 194318, "linear_dense_total": 4718592, "linear_dense_nnz": 1774807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2012358, "linear_attention_total": 2359296, "linear_attention_nnz": 270153, "linear_dense_total": 4718592, "linear_dense_nnz": 1742205}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860862, "linear_attention_total": 2359296, "linear_attention_nnz": 207935, "linear_dense_total": 4718592, "linear_dense_nnz": 1652927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815188, "linear_attention_total": 2359296, "linear_attention_nnz": 215427, "linear_dense_total": 4718592, "linear_dense_nnz": 1599761}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518978, "linear_attention_total": 2359296, "linear_attention_nnz": 114563, "linear_dense_total": 4718592, "linear_dense_nnz": 1404415}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307646, "linear_attention_total": 2359296, "linear_attention_nnz": 165011, "linear_dense_total": 4718592, "linear_dense_nnz": 1142635}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 946142, "linear_attention_total": 2359296, "linear_attention_nnz": 86589, "linear_dense_total": 4718592, "linear_dense_nnz": 859553}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531809, "linear_attention_total": 2359296, "linear_attention_nnz": 110020, "linear_dense_total": 4718592, "linear_dense_nnz": 421789}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 419075, "linear_attention_total": 2359296, "linear_attention_nnz": 89475, "linear_dense_total": 4718592, "linear_dense_nnz": 329600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 193916, "linear_attention_total": 2359296, "linear_attention_nnz": 45791, "linear_dense_total": 4718592, "linear_dense_nnz": 148125}}, "total_sparsity": 63.065693568741764, "linear_sparsity": 80.85538487375518}, "speed": {"eval_elapsed_time": 25.115268317982554}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.30981160352648}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl5_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 81807426, "linear_total": 84934656, "linear_nnz": 57862144, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5237760, "linear_attention_total": 2359296, "linear_attention_nnz": 921600, "linear_dense_total": 4718592, "linear_dense_nnz": 4316160}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5140480, "linear_attention_total": 2359296, "linear_attention_nnz": 829440, "linear_dense_total": 4718592, "linear_dense_nnz": 4311040}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5641216, "linear_attention_total": 2359296, "linear_attention_nnz": 1221632, "linear_dense_total": 4718592, "linear_dense_nnz": 4419584}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5816320, "linear_attention_total": 2359296, "linear_attention_nnz": 1386496, "linear_dense_total": 4718592, "linear_dense_nnz": 4429824}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5997568, "linear_attention_total": 2359296, "linear_attention_nnz": 1540096, "linear_dense_total": 4718592, "linear_dense_nnz": 4457472}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5968896, "linear_attention_total": 2359296, "linear_attention_nnz": 1548288, "linear_dense_total": 4718592, "linear_dense_nnz": 4420608}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5685248, "linear_attention_total": 2359296, "linear_attention_nnz": 1364992, "linear_dense_total": 4718592, "linear_dense_nnz": 4320256}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5458944, "linear_attention_total": 2359296, "linear_attention_nnz": 1272832, "linear_dense_total": 4718592, "linear_dense_nnz": 4186112}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4961280, "linear_attention_total": 2359296, "linear_attention_nnz": 1173504, "linear_dense_total": 4718592, "linear_dense_nnz": 3787776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3566592, "linear_attention_total": 2359296, "linear_attention_nnz": 727040, "linear_dense_total": 4718592, "linear_dense_nnz": 2839552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673664, "linear_attention_total": 2359296, "linear_attention_nnz": 671744, "linear_dense_total": 4718592, "linear_dense_nnz": 2001920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1714176, "linear_attention_total": 2359296, "linear_attention_nnz": 409600, "linear_dense_total": 4718592, "linear_dense_nnz": 1304576}}, "total_sparsity": 24.873695953757846, "linear_sparsity": 31.87451774691358}, "speed": {"eval_elapsed_time": 32.08257991797291}, "opt_eval_metrics": {"exact_match": 81.47587511825922, "f1": 88.73698799207777}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl5_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 81295202, "linear_total": 84934656, "linear_nnz": 57351168, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5262336, "linear_attention_total": 2359296, "linear_attention_nnz": 804864, "linear_dense_total": 4718592, "linear_dense_nnz": 4457472}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5235712, "linear_attention_total": 2359296, "linear_attention_nnz": 771072, "linear_dense_total": 4718592, "linear_dense_nnz": 4464640}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5669888, "linear_attention_total": 2359296, "linear_attention_nnz": 1152000, "linear_dense_total": 4718592, "linear_dense_nnz": 4517888}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5875712, "linear_attention_total": 2359296, "linear_attention_nnz": 1312768, "linear_dense_total": 4718592, "linear_dense_nnz": 4562944}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6056960, "linear_attention_total": 2359296, "linear_attention_nnz": 1501184, "linear_dense_total": 4718592, "linear_dense_nnz": 4555776}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5898240, "linear_attention_total": 2359296, "linear_attention_nnz": 1377280, "linear_dense_total": 4718592, "linear_dense_nnz": 4520960}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5831680, "linear_attention_total": 2359296, "linear_attention_nnz": 1357824, "linear_dense_total": 4718592, "linear_dense_nnz": 4473856}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5483520, "linear_attention_total": 2359296, "linear_attention_nnz": 1192960, "linear_dense_total": 4718592, "linear_dense_nnz": 4290560}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4958208, "linear_attention_total": 2359296, "linear_attention_nnz": 1069056, "linear_dense_total": 4718592, "linear_dense_nnz": 3889152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3236864, "linear_attention_total": 2359296, "linear_attention_nnz": 718848, "linear_dense_total": 4718592, "linear_dense_nnz": 2518016}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2222080, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 1607680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1619968, "linear_attention_total": 2359296, "linear_attention_nnz": 389120, "linear_dense_total": 4718592, "linear_dense_nnz": 1230848}}, "total_sparsity": 25.344087186502197, "linear_sparsity": 32.47612847222222}, "speed": {"eval_elapsed_time": 31.65403198893182}, "opt_eval_metrics": {"exact_match": 81.51371807000946, "f1": 88.67903677006836}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39702836, "linear_total": 84934656, "linear_nnz": 15791104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125888, "linear_attention_total": 2359296, "linear_attention_nnz": 838656, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1188352, "linear_attention_total": 2359296, "linear_attention_nnz": 692224, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1694720, "linear_attention_total": 2359296, "linear_attention_nnz": 1089536, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1962496, "linear_attention_total": 2359296, "linear_attention_nnz": 1291264, "linear_dense_total": 4718592, "linear_dense_nnz": 671232}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2112512, "linear_attention_total": 2359296, "linear_attention_nnz": 1384448, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1783296, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1632768, "linear_attention_total": 2359296, "linear_attention_nnz": 1127424, "linear_dense_total": 4718592, "linear_dense_nnz": 505344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1333760, "linear_attention_total": 2359296, "linear_attention_nnz": 942080, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1204736, "linear_attention_total": 2359296, "linear_attention_nnz": 982016, "linear_dense_total": 4718592, "linear_dense_nnz": 222720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 729600, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 573952, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 449024, "linear_attention_total": 2359296, "linear_attention_nnz": 293888, "linear_dense_total": 4718592, "linear_dense_nnz": 155136}}, "total_sparsity": 63.53965068117302, "linear_sparsity": 81.40793788580247}, "speed": {"eval_elapsed_time": 17.754389239940792}, "opt_eval_metrics": {"exact_match": 79.20529801324503, "f1": 87.11181141207972}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41908226, "linear_total": 84934656, "linear_nnz": 17949696, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081280, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 606720}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529856, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 841728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2437632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2115072, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1927680, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1448448, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 668160}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665088, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 173568}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 595968, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.51437244200017, "linear_sparsity": 78.86646412037037}, "speed": {"eval_elapsed_time": 12.991677745943889}, "opt_eval_metrics": {"exact_match": 78.05108798486282, "f1": 85.81174728555466}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41954306, "linear_total": 84934656, "linear_nnz": 17995776, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2079744, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1626624, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 840192}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434560, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1058304}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2116608, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1035264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1829376, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886208, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1099776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497600, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1210368, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 764928, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 546816, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.4720557446083, "linear_sparsity": 78.81221064814815}, "speed": {"eval_elapsed_time": 12.87139375694096}, "opt_eval_metrics": {"exact_match": 77.62535477767265, "f1": 85.49958980627748}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 36346370, "linear_total": 84934656, "linear_nnz": 12387840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1721856, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 950784, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1857024, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437696, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1113600, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1015296, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 425472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 731136, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 999936, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 213504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 296448, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 99840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 614400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 122880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}}, "total_sparsity": 66.62199781720042, "linear_sparsity": 85.4148582175926}, "speed": {"eval_elapsed_time": 11.044030340854079}, "opt_eval_metrics": {"exact_match": 76.75496688741723, "f1": 84.83470649534952}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 35750402, "linear_total": 84934656, "linear_nnz": 11791872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893952, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1247232, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 520704}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 978432, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 388608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 365568, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}}, "total_sparsity": 67.16929377013544, "linear_sparsity": 86.11653645833334}, "speed": {"eval_elapsed_time": 10.875461397925392}, "opt_eval_metrics": {"exact_match": 76.3197729422895, "f1": 84.62201750681498}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 35662850, "linear_total": 84934656, "linear_nnz": 11704320, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 897024, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 258048}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804800, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1184256, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1064448, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 474624}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 364032, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}}, "total_sparsity": 67.24969549518002, "linear_sparsity": 86.21961805555556}, "speed": {"eval_elapsed_time": 10.863983491901308}, "opt_eval_metrics": {"exact_match": 76.5279091769158, "f1": 84.6776690586996}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35638274, "linear_total": 84934656, "linear_nnz": 11679744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1586688, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 210432}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887808, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1175040, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 486912}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1062912, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 290304, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 93696}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360960, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 164352}}, "total_sparsity": 67.27226440045568, "linear_sparsity": 86.24855324074075}, "speed": {"eval_elapsed_time": 10.856388033833355}, "opt_eval_metrics": {"exact_match": 76.31031220435194, "f1": 84.63605545666391}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33844482, "linear_total": 84934656, "linear_nnz": 9885952, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701184, "linear_attention_total": 2359296, "linear_attention_nnz": 295680, "linear_dense_total": 4718592, "linear_dense_nnz": 405504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042688, "linear_attention_total": 2359296, "linear_attention_nnz": 380672, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1087744, "linear_attention_total": 2359296, "linear_attention_nnz": 328960, "linear_dense_total": 4718592, "linear_dense_nnz": 758784}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1340672, "linear_attention_total": 2359296, "linear_attention_nnz": 612608, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1142784, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 811008}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1165312, "linear_attention_total": 2359296, "linear_attention_nnz": 411136, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908032, "linear_attention_total": 2359296, "linear_attention_nnz": 319744, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 956672, "linear_attention_total": 2359296, "linear_attention_nnz": 457472, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 557056, "linear_attention_total": 2359296, "linear_attention_nnz": 246784, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360192, "linear_attention_total": 2359296, "linear_attention_nnz": 252672, "linear_dense_total": 4718592, "linear_dense_nnz": 107520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315904, "linear_attention_total": 2359296, "linear_attention_nnz": 202240, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 307712, "linear_attention_total": 2359296, "linear_attention_nnz": 129536, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}}, "total_sparsity": 68.91955939281638, "linear_sparsity": 88.36052035108025}, "speed": {"eval_elapsed_time": 14.372085305862129}, "opt_eval_metrics": {"exact_match": 76.33869441816462, "f1": 84.90005817955239}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 46753113, "linear_total": 84934656, "linear_nnz": 22841856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2904576, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1430016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2368512, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1582080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3084288, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1904640}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2600448, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1715712}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2244096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1654272}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096640, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1703424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1910784, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1476096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1082880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1534464, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 748032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523776, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 327168}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 990720, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1107456, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 812544}}, "total_sparsity": 57.06516200196401, "linear_sparsity": 73.10655381944444}, "speed": {"eval_elapsed_time": 13.856825530063361}, "opt_eval_metrics": {"exact_match": 78.31598864711448, "f1": 86.14732314693939}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 46580969, "linear_total": 84934656, "linear_nnz": 22669824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2883072, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1408512}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2342400, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1555968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3055104, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1875456}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2585088, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1700352}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1635840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2082816, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1689600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1901568, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1311744}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1468416, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1075200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1528320, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 741888}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520704, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 324096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1093632, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}}, "total_sparsity": 57.22324719197764, "linear_sparsity": 73.30910011574075}, "speed": {"eval_elapsed_time": 13.743963541928679}, "opt_eval_metrics": {"exact_match": 77.96594134342479, "f1": 85.85795020085484}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46293486, "linear_total": 84934656, "linear_nnz": 22382592, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850816, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1376256}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2323968, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1537536}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3022848, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1843200}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2557440, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1672704}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1620480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2068992, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1675776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790976, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1299456}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1462272, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1523712, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 737280}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 322560}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 970752, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 786432}}, "total_sparsity": 57.487251773494805, "linear_sparsity": 73.6472800925926}, "speed": {"eval_elapsed_time": 13.430293028010055}, "opt_eval_metrics": {"exact_match": 77.88079470198676, "f1": 85.81326419854291}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl300_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 26853628, "linear_total": 84934656, "linear_nnz": 2895098, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 357857, "linear_attention_total": 2359296, "linear_attention_nnz": 13195, "linear_dense_total": 4718592, "linear_dense_nnz": 344662}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 405482, "linear_attention_total": 2359296, "linear_attention_nnz": 53357, "linear_dense_total": 4718592, "linear_dense_nnz": 352125}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 395119, "linear_attention_total": 2359296, "linear_attention_nnz": 43981, "linear_dense_total": 4718592, "linear_dense_nnz": 351138}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 394117, "linear_attention_total": 2359296, "linear_attention_nnz": 71058, "linear_dense_total": 4718592, "linear_dense_nnz": 323059}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 335373, "linear_attention_total": 2359296, "linear_attention_nnz": 47705, "linear_dense_total": 4718592, "linear_dense_nnz": 287668}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 292526, "linear_attention_total": 2359296, "linear_attention_nnz": 40348, "linear_dense_total": 4718592, "linear_dense_nnz": 252178}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 238114, "linear_attention_total": 2359296, "linear_attention_nnz": 33002, "linear_dense_total": 4718592, "linear_dense_nnz": 205112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 188891, "linear_attention_total": 2359296, "linear_attention_nnz": 38753, "linear_dense_total": 4718592, "linear_dense_nnz": 150138}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 123365, "linear_attention_total": 2359296, "linear_attention_nnz": 22052, "linear_dense_total": 4718592, "linear_dense_nnz": 101313}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64415, "linear_attention_total": 2359296, "linear_attention_nnz": 28498, "linear_dense_total": 4718592, "linear_dense_nnz": 35917}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 53470, "linear_attention_total": 2359296, "linear_attention_nnz": 18747, "linear_dense_total": 4718592, "linear_dense_nnz": 34723}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 46369, "linear_attention_total": 2359296, "linear_attention_nnz": 15957, "linear_dense_total": 4718592, "linear_dense_nnz": 30412}}, "total_sparsity": 75.33947808267818, "linear_sparsity": 96.59138196780358}, "speed": {"eval_elapsed_time": 25.971711199032143}, "opt_eval_metrics": {"exact_match": 76.98202459791864, "f1": 85.40699359564026}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 40641026, "linear_total": 84934656, "linear_nnz": 16682496, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1930752, "linear_attention_total": 2359296, "linear_attention_nnz": 1390080, "linear_dense_total": 4718592, "linear_dense_nnz": 540672}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1347840, "linear_attention_total": 2359296, "linear_attention_nnz": 622848, "linear_dense_total": 4718592, "linear_dense_nnz": 724992}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2423808, "linear_attention_total": 2359296, "linear_attention_nnz": 1506816, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1864704, "linear_attention_total": 2359296, "linear_attention_nnz": 966144, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1956096, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 990720}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742592, "linear_attention_total": 2359296, "linear_attention_nnz": 734976, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1323264, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835584, "linear_attention_total": 2359296, "linear_attention_nnz": 282624, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307904, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 403968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 536064, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 506880, "linear_attention_total": 2359296, "linear_attention_nnz": 322560, "linear_dense_total": 4718592, "linear_dense_nnz": 184320}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 761088, "linear_attention_total": 2359296, "linear_attention_nnz": 412416, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}}, "total_sparsity": 62.67808162027695, "linear_sparsity": 80.3584346064815}, "speed": {"eval_elapsed_time": 13.440584641881287}, "opt_eval_metrics": {"exact_match": 76.13055818353831, "f1": 84.59415607632204}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 39460610, "linear_total": 84934656, "linear_nnz": 15502080, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1801728, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1127424, "linear_attention_total": 2359296, "linear_attention_nnz": 471552, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2341632, "linear_attention_total": 2359296, "linear_attention_nnz": 1507584, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804032, "linear_attention_total": 2359296, "linear_attention_nnz": 960768, "linear_dense_total": 4718592, "linear_dense_nnz": 843264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899264, "linear_attention_total": 2359296, "linear_attention_nnz": 968448, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529088, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 564480, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738048, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 506880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 893952, "linear_dense_total": 4718592, "linear_dense_nnz": 384000}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668928, "linear_attention_total": 2359296, "linear_attention_nnz": 535296, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488448, "linear_attention_total": 2359296, "linear_attention_nnz": 319488, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 560640, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}}, "total_sparsity": 63.76209435179903, "linear_sparsity": 81.7482277199074}, "speed": {"eval_elapsed_time": 13.018812068970874}, "opt_eval_metrics": {"exact_match": 76.20624408703878, "f1": 84.78885528858153}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 39496706, "linear_total": 84934656, "linear_nnz": 15538176, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798656, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1122816, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 649728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2325504, "linear_attention_total": 2359296, "linear_attention_nnz": 1500672, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790208, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886976, "linear_attention_total": 2359296, "linear_attention_nnz": 963840, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522944, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 924672}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 558336, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 737280, "linear_attention_total": 2359296, "linear_attention_nnz": 235008, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1286400, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 382464}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665856, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 417792, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 559104, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 301056}}, "total_sparsity": 63.72894627217538, "linear_sparsity": 81.70572916666666}, "speed": {"eval_elapsed_time": 12.96851964481175}, "opt_eval_metrics": {"exact_match": 75.67644276253547, "f1": 84.4740049617883}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39419906, "linear_total": 84934656, "linear_nnz": 15461376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1800192, "linear_attention_total": 2359296, "linear_attention_nnz": 1334784, "linear_dense_total": 4718592, "linear_dense_nnz": 465408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1118976, "linear_attention_total": 2359296, "linear_attention_nnz": 473856, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2320896, "linear_attention_total": 2359296, "linear_attention_nnz": 1497600, "linear_dense_total": 4718592, "linear_dense_nnz": 823296}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1788672, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 832512}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1891584, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1520640, "linear_attention_total": 2359296, "linear_attention_nnz": 600576, "linear_dense_total": 4718592, "linear_dense_nnz": 920064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 732672, "linear_attention_total": 2359296, "linear_attention_nnz": 230400, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277184, "linear_attention_total": 2359296, "linear_attention_nnz": 897792, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 660480, "linear_attention_total": 2359296, "linear_attention_nnz": 528384, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555264, "linear_attention_total": 2359296, "linear_attention_nnz": 257280, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.79947410116185, "linear_sparsity": 81.79615162037037}, "speed": {"eval_elapsed_time": 12.973318020114675}, "opt_eval_metrics": {"exact_match": 76.0170293282876, "f1": 84.48208063503463}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39382274, "linear_total": 84934656, "linear_nnz": 15423744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1793280, "linear_attention_total": 2359296, "linear_attention_nnz": 1323264, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1115136, "linear_attention_total": 2359296, "linear_attention_nnz": 470016, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2321664, "linear_attention_total": 2359296, "linear_attention_nnz": 1496832, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1789440, "linear_attention_total": 2359296, "linear_attention_nnz": 960000, "linear_dense_total": 4718592, "linear_dense_nnz": 829440}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1843968, "linear_attention_total": 2359296, "linear_attention_nnz": 917760, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526016, "linear_attention_total": 2359296, "linear_attention_nnz": 607488, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 567552, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730368, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1279488, "linear_attention_total": 2359296, "linear_attention_nnz": 900096, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667392, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 556032, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.83403273736522, "linear_sparsity": 81.84045862268519}, "speed": {"eval_elapsed_time": 12.801363392965868}, "opt_eval_metrics": {"exact_match": 75.93188268684958, "f1": 84.50981123274157}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 45409666, "linear_total": 84934656, "linear_nnz": 21492736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1715200, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 1234944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895424, "linear_attention_total": 2359296, "linear_attention_nnz": 400384, "linear_dense_total": 4718592, "linear_dense_nnz": 1495040}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3012608, "linear_attention_total": 2359296, "linear_attention_nnz": 594944, "linear_dense_total": 4718592, "linear_dense_nnz": 2417664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3094528, "linear_attention_total": 2359296, "linear_attention_nnz": 813056, "linear_dense_total": 4718592, "linear_dense_nnz": 2281472}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2762752, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 2163712}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2707456, "linear_attention_total": 2359296, "linear_attention_nnz": 562176, "linear_dense_total": 4718592, "linear_dense_nnz": 2145280}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2233344, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1741824, "linear_attention_total": 2359296, "linear_attention_nnz": 678912, "linear_dense_total": 4718592, "linear_dense_nnz": 1062912}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 709632, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 370688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 524288, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 164864}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 593920, "linear_attention_total": 2359296, "linear_attention_nnz": 267264, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 163840, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}}, "total_sparsity": 58.29889117212532, "linear_sparsity": 74.6949749228395}, "speed": {"eval_elapsed_time": 18.650014573941007}, "opt_eval_metrics": {"exact_match": 77.01986754966887, "f1": 85.2617013700351}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 44308674, "linear_total": 84934656, "linear_nnz": 20392960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1598464, "linear_attention_total": 2359296, "linear_attention_nnz": 458752, "linear_dense_total": 4718592, "linear_dense_nnz": 1139712}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825792, "linear_attention_total": 2359296, "linear_attention_nnz": 398336, "linear_dense_total": 4718592, "linear_dense_nnz": 1427456}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2854912, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 2257920}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2905088, "linear_attention_total": 2359296, "linear_attention_nnz": 781312, "linear_dense_total": 4718592, "linear_dense_nnz": 2123776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2643968, "linear_attention_total": 2359296, "linear_attention_nnz": 620544, "linear_dense_total": 4718592, "linear_dense_nnz": 2023424}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2543616, "linear_attention_total": 2359296, "linear_attention_nnz": 573440, "linear_dense_total": 4718592, "linear_dense_nnz": 1970176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2049024, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 1588224}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1708032, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 666624, "linear_attention_total": 2359296, "linear_attention_nnz": 307200, "linear_dense_total": 4718592, "linear_dense_nnz": 359424}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489472, "linear_attention_total": 2359296, "linear_attention_nnz": 327680, "linear_dense_total": 4718592, "linear_dense_nnz": 161792}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598016, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 509952, "linear_attention_total": 2359296, "linear_attention_nnz": 162816, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}}, "total_sparsity": 59.309966373837206, "linear_sparsity": 75.98982445987654}, "speed": {"eval_elapsed_time": 18.39338346105069}, "opt_eval_metrics": {"exact_match": 76.98202459791864, "f1": 85.22056943761015}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 43172098, "linear_total": 84934656, "linear_nnz": 19257344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1469440, "linear_attention_total": 2359296, "linear_attention_nnz": 443392, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 396288, "linear_dense_total": 4718592, "linear_dense_nnz": 1296384}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692096, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 2113536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2728960, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 1973248}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2531328, "linear_attention_total": 2359296, "linear_attention_nnz": 565248, "linear_dense_total": 4718592, "linear_dense_nnz": 1966080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434048, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 1887232}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1978368, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 1502208}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638400, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1000448}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 620544, "linear_attention_total": 2359296, "linear_attention_nnz": 310272, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 457728, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 308224}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 467968, "linear_attention_total": 2359296, "linear_attention_nnz": 152576, "linear_dense_total": 4718592, "linear_dense_nnz": 315392}}, "total_sparsity": 60.35371946964616, "linear_sparsity": 77.3268711419753}, "speed": {"eval_elapsed_time": 17.99394460907206}, "opt_eval_metrics": {"exact_match": 76.92526017029329, "f1": 85.21713644985097}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42975330, "linear_total": 84934656, "linear_nnz": 19060736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1463296, "linear_attention_total": 2359296, "linear_attention_nnz": 455680, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1699840, "linear_attention_total": 2359296, "linear_attention_nnz": 399360, "linear_dense_total": 4718592, "linear_dense_nnz": 1300480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2724864, "linear_attention_total": 2359296, "linear_attention_nnz": 544768, "linear_dense_total": 4718592, "linear_dense_nnz": 2180096}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2670592, "linear_attention_total": 2359296, "linear_attention_nnz": 731136, "linear_dense_total": 4718592, "linear_dense_nnz": 1939456}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2498560, "linear_attention_total": 2359296, "linear_attention_nnz": 557056, "linear_dense_total": 4718592, "linear_dense_nnz": 1941504}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2407424, "linear_attention_total": 2359296, "linear_attention_nnz": 527360, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1928192, "linear_attention_total": 2359296, "linear_attention_nnz": 472064, "linear_dense_total": 4718592, "linear_dense_nnz": 1456128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 607232, "linear_dense_total": 4718592, "linear_dense_nnz": 977920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 317440}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455680, "linear_attention_total": 2359296, "linear_attention_nnz": 308224, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 240640, "linear_dense_total": 4718592, "linear_dense_nnz": 305152}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 329728}}, "total_sparsity": 60.53441764482857, "linear_sparsity": 77.55835262345678}, "speed": {"eval_elapsed_time": 17.949384653009474}, "opt_eval_metrics": {"exact_match": 77.08609271523179, "f1": 85.20287591064626}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53955042, "linear_total": 84934656, "linear_nnz": 30029824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2392064, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 1844224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2719744, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 2172928}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3872768, "linear_attention_total": 2359296, "linear_attention_nnz": 675840, "linear_dense_total": 4718592, "linear_dense_nnz": 3196928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4077568, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 3111936}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4003840, "linear_attention_total": 2359296, "linear_attention_nnz": 896000, "linear_dense_total": 4718592, "linear_dense_nnz": 3107840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3832832, "linear_attention_total": 2359296, "linear_attention_nnz": 696320, "linear_dense_total": 4718592, "linear_dense_nnz": 3136512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3280896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 2525184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2510848, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1711104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257472, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 747520}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 420864, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 748544, "linear_attention_total": 2359296, "linear_attention_nnz": 356352, "linear_dense_total": 4718592, "linear_dense_nnz": 392192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 651264, "linear_attention_total": 2359296, "linear_attention_nnz": 217088, "linear_dense_total": 4718592, "linear_dense_nnz": 434176}}, "total_sparsity": 50.45140657377771, "linear_sparsity": 64.6436149691358}, "speed": {"eval_elapsed_time": 22.46348627889529}, "opt_eval_metrics": {"exact_match": 78.94985808893094, "f1": 86.768721062838}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47626001, "linear_total": 84934656, "linear_nnz": 23714304, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2895360, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2330112, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3240960, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2061312}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3095040, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1915392}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2291712, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1800192}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2221056, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1827840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1861632, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1300992, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 1104384}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637376, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 582144, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 385536}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1052160, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 560640}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1205760, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 910848}}, "total_sparsity": 56.2635617989908, "linear_sparsity": 72.07935474537037}, "speed": {"eval_elapsed_time": 13.948209983995184}, "opt_eval_metrics": {"exact_match": 77.78618732261117, "f1": 85.70556837897196}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 45695714, "linear_total": 84934656, "linear_nnz": 21777408, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1549312, "linear_attention_total": 2359296, "linear_attention_nnz": 679936, "linear_dense_total": 4718592, "linear_dense_nnz": 869376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1868800, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 1269760}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2739200, "linear_attention_total": 2359296, "linear_attention_nnz": 875520, "linear_dense_total": 4718592, "linear_dense_nnz": 1863680}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3088384, "linear_attention_total": 2359296, "linear_attention_nnz": 1137664, "linear_dense_total": 4718592, "linear_dense_nnz": 1950720}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2821120, "linear_attention_total": 2359296, "linear_attention_nnz": 1033216, "linear_dense_total": 4718592, "linear_dense_nnz": 1787904}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2709504, "linear_attention_total": 2359296, "linear_attention_nnz": 850944, "linear_dense_total": 4718592, "linear_dense_nnz": 1858560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225152, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1426432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 987136}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049600, "linear_attention_total": 2359296, "linear_attention_nnz": 782336, "linear_dense_total": 4718592, "linear_dense_nnz": 267264}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649216, "linear_attention_total": 2359296, "linear_attention_nnz": 504832, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 662528, "linear_attention_total": 2359296, "linear_attention_nnz": 379904, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 548864, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 290816}}, "total_sparsity": 58.036204395746125, "linear_sparsity": 74.35980902777779}, "speed": {"eval_elapsed_time": 20.075127677991986}, "opt_eval_metrics": {"exact_match": 77.92809839167455, "f1": 85.97854187426412}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 44413282, "linear_total": 84934656, "linear_nnz": 20496384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1436672, "linear_attention_total": 2359296, "linear_attention_nnz": 647168, "linear_dense_total": 4718592, "linear_dense_nnz": 789504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798144, "linear_attention_total": 2359296, "linear_attention_nnz": 591872, "linear_dense_total": 4718592, "linear_dense_nnz": 1206272}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2583552, "linear_attention_total": 2359296, "linear_attention_nnz": 843776, "linear_dense_total": 4718592, "linear_dense_nnz": 1739776}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2975744, "linear_attention_total": 2359296, "linear_attention_nnz": 1118208, "linear_dense_total": 4718592, "linear_dense_nnz": 1857536}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1760256}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2509824, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 1718272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2085888, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 1330176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1731584, "linear_attention_total": 2359296, "linear_attention_nnz": 827392, "linear_dense_total": 4718592, "linear_dense_nnz": 904192}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 726016, "linear_dense_total": 4718592, "linear_dense_nnz": 257024}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 464896, "linear_dense_total": 4718592, "linear_dense_nnz": 118784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 622592, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 263168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 512000, "linear_attention_total": 2359296, "linear_attention_nnz": 240640, "linear_dense_total": 4718592, "linear_dense_nnz": 271360}}, "total_sparsity": 59.21390159343854, "linear_sparsity": 75.86805555555556}, "speed": {"eval_elapsed_time": 19.613351088017225}, "opt_eval_metrics": {"exact_match": 77.8713339640492, "f1": 85.84893170709621}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 67469538, "linear_total": 84934656, "linear_nnz": 43535360, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4336640, "linear_attention_total": 2359296, "linear_attention_nnz": 571392, "linear_dense_total": 4718592, "linear_dense_nnz": 3765248}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4451328, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 3852288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4783104, "linear_attention_total": 2359296, "linear_attention_nnz": 695296, "linear_dense_total": 4718592, "linear_dense_nnz": 4087808}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5047296, "linear_attention_total": 2359296, "linear_attention_nnz": 996352, "linear_dense_total": 4718592, "linear_dense_nnz": 4050944}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5032960, "linear_attention_total": 2359296, "linear_attention_nnz": 923648, "linear_dense_total": 4718592, "linear_dense_nnz": 4109312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4907008, "linear_attention_total": 2359296, "linear_attention_nnz": 865280, "linear_dense_total": 4718592, "linear_dense_nnz": 4041728}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4636672, "linear_attention_total": 2359296, "linear_attention_nnz": 778240, "linear_dense_total": 4718592, "linear_dense_nnz": 3858432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4243456, "linear_attention_total": 2359296, "linear_attention_nnz": 883712, "linear_dense_total": 4718592, "linear_dense_nnz": 3359744}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2818048, "linear_attention_total": 2359296, "linear_attention_nnz": 513024, "linear_dense_total": 4718592, "linear_dense_nnz": 2305024}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1289216, "linear_attention_total": 2359296, "linear_attention_nnz": 462848, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1047552, "linear_attention_total": 2359296, "linear_attention_nnz": 374784, "linear_dense_total": 4718592, "linear_dense_nnz": 672768}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 942080, "linear_attention_total": 2359296, "linear_attention_nnz": 235520, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}}, "total_sparsity": 38.04062450702838, "linear_sparsity": 48.742525077160494}, "speed": {"eval_elapsed_time": 26.131227070000023}, "opt_eval_metrics": {"exact_match": 79.67833491012298, "f1": 87.14623278516426}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 64400930, "linear_total": 84934656, "linear_nnz": 40469504, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3907584, "linear_attention_total": 2359296, "linear_attention_nnz": 527360, "linear_dense_total": 4718592, "linear_dense_nnz": 3380224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4186112, "linear_attention_total": 2359296, "linear_attention_nnz": 524288, "linear_dense_total": 4718592, "linear_dense_nnz": 3661824}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4629504, "linear_attention_total": 2359296, "linear_attention_nnz": 598016, "linear_dense_total": 4718592, "linear_dense_nnz": 4031488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5038080, "linear_attention_total": 2359296, "linear_attention_nnz": 930816, "linear_dense_total": 4718592, "linear_dense_nnz": 4107264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4822016, "linear_attention_total": 2359296, "linear_attention_nnz": 824320, "linear_dense_total": 4718592, "linear_dense_nnz": 3997696}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4773888, "linear_attention_total": 2359296, "linear_attention_nnz": 746496, "linear_dense_total": 4718592, "linear_dense_nnz": 4027392}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4408320, "linear_attention_total": 2359296, "linear_attention_nnz": 670720, "linear_dense_total": 4718592, "linear_dense_nnz": 3737600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3789824, "linear_attention_total": 2359296, "linear_attention_nnz": 794624, "linear_dense_total": 4718592, "linear_dense_nnz": 2995200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2176000, "linear_attention_total": 2359296, "linear_attention_nnz": 419840, "linear_dense_total": 4718592, "linear_dense_nnz": 1756160}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1011712, "linear_attention_total": 2359296, "linear_attention_nnz": 411648, "linear_dense_total": 4718592, "linear_dense_nnz": 600064}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 903168, "linear_attention_total": 2359296, "linear_attention_nnz": 307200, "linear_dense_total": 4718592, "linear_dense_nnz": 595968}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 823296, "linear_attention_total": 2359296, "linear_attention_nnz": 207872, "linear_dense_total": 4718592, "linear_dense_nnz": 615424}}, "total_sparsity": 40.85862268737366, "linear_sparsity": 52.35218942901234}, "speed": {"eval_elapsed_time": 25.23966666101478}, "opt_eval_metrics": {"exact_match": 79.29990539262063, "f1": 87.09851869948527}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 64383586, "linear_total": 84934656, "linear_nnz": 40452096, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3881984, "linear_attention_total": 2359296, "linear_attention_nnz": 501760, "linear_dense_total": 4718592, "linear_dense_nnz": 3380224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4185088, "linear_attention_total": 2359296, "linear_attention_nnz": 528384, "linear_dense_total": 4718592, "linear_dense_nnz": 3656704}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4703232, "linear_attention_total": 2359296, "linear_attention_nnz": 581632, "linear_dense_total": 4718592, "linear_dense_nnz": 4121600}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5060608, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 4144128}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4893696, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 4060160}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4817920, "linear_attention_total": 2359296, "linear_attention_nnz": 741376, "linear_dense_total": 4718592, "linear_dense_nnz": 4076544}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4459520, "linear_attention_total": 2359296, "linear_attention_nnz": 644096, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3720192, "linear_attention_total": 2359296, "linear_attention_nnz": 757760, "linear_dense_total": 4718592, "linear_dense_nnz": 2962432}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2070528, "linear_attention_total": 2359296, "linear_attention_nnz": 380928, "linear_dense_total": 4718592, "linear_dense_nnz": 1689600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 966656, "linear_attention_total": 2359296, "linear_attention_nnz": 395264, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 874496, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 561152}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818176, "linear_attention_total": 2359296, "linear_attention_nnz": 200704, "linear_dense_total": 4718592, "linear_dense_nnz": 617472}}, "total_sparsity": 40.874550222086434, "linear_sparsity": 52.37268518518518}, "speed": {"eval_elapsed_time": 25.169638738036156}, "opt_eval_metrics": {"exact_match": 79.22421948912014, "f1": 87.0664817371684}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41117954, "linear_total": 84934656, "linear_nnz": 17159424, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879296, "linear_attention_total": 2359296, "linear_attention_nnz": 1459968, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1487616, "linear_attention_total": 2359296, "linear_attention_nnz": 930048, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2451456, "linear_attention_total": 2359296, "linear_attention_nnz": 1651200, "linear_dense_total": 4718592, "linear_dense_nnz": 800256}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959168, "linear_attention_total": 2359296, "linear_attention_nnz": 1181952, "linear_dense_total": 4718592, "linear_dense_nnz": 777216}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1876992, "linear_attention_total": 2359296, "linear_attention_nnz": 996864, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 720384, "linear_dense_total": 4718592, "linear_dense_nnz": 886272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1242624, "linear_attention_total": 2359296, "linear_attention_nnz": 595968, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1026048, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1362432, "linear_attention_total": 2359296, "linear_attention_nnz": 1029120, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784128, "linear_attention_total": 2359296, "linear_attention_nnz": 673536, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563712, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.240103802270966, "linear_sparsity": 79.7969111689815}, "speed": {"eval_elapsed_time": 14.573690482182428}, "opt_eval_metrics": {"exact_match": 78.00378429517502, "f1": 85.86131877012127}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41037314, "linear_total": 84934656, "linear_nnz": 17078784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1881600, "linear_attention_total": 2359296, "linear_attention_nnz": 1460736, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488384, "linear_attention_total": 2359296, "linear_attention_nnz": 930816, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2430720, "linear_attention_total": 2359296, "linear_attention_nnz": 1636608, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1953024, "linear_attention_total": 2359296, "linear_attention_nnz": 1172736, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1822464, "linear_attention_total": 2359296, "linear_attention_nnz": 946944, "linear_dense_total": 4718592, "linear_dense_nnz": 875520}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602816, "linear_attention_total": 2359296, "linear_attention_nnz": 719616, "linear_dense_total": 4718592, "linear_dense_nnz": 883200}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1248768, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023744, "linear_attention_total": 2359296, "linear_attention_nnz": 530688, "linear_dense_total": 4718592, "linear_dense_nnz": 493056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360128, "linear_attention_total": 2359296, "linear_attention_nnz": 1026816, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 785664, "linear_attention_total": 2359296, "linear_attention_nnz": 675072, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 562176, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.31415802270676, "linear_sparsity": 79.89185474537037}, "speed": {"eval_elapsed_time": 14.54654596094042}, "opt_eval_metrics": {"exact_match": 78.04162724692526, "f1": 85.89832211406967}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 65744386, "linear_total": 84934656, "linear_nnz": 41809920, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3775488, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 3140608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4079616, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 3477504}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4937728, "linear_attention_total": 2359296, "linear_attention_nnz": 1008640, "linear_dense_total": 4718592, "linear_dense_nnz": 3929088}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5286912, "linear_attention_total": 2359296, "linear_attention_nnz": 1197056, "linear_dense_total": 4718592, "linear_dense_nnz": 4089856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5135360, "linear_attention_total": 2359296, "linear_attention_nnz": 1181696, "linear_dense_total": 4718592, "linear_dense_nnz": 3953664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5012480, "linear_attention_total": 2359296, "linear_attention_nnz": 1005568, "linear_dense_total": 4718592, "linear_dense_nnz": 4006912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4720640, "linear_attention_total": 2359296, "linear_attention_nnz": 1043456, "linear_dense_total": 4718592, "linear_dense_nnz": 3677184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3708928, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 2777088}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2311168, "linear_attention_total": 2359296, "linear_attention_nnz": 862208, "linear_dense_total": 4718592, "linear_dense_nnz": 1448960}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1058816, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 458752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 951296, "linear_attention_total": 2359296, "linear_attention_nnz": 456704, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 831488, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 541696}}, "total_sparsity": 39.6248852522324, "linear_sparsity": 50.774016203703695}, "speed": {"eval_elapsed_time": 27.04506094707176}, "opt_eval_metrics": {"exact_match": 80.48249763481552, "f1": 87.91705961229685}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 38110440, "linear_total": 84934656, "linear_nnz": 14151910, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1521793, "linear_attention_total": 2359296, "linear_attention_nnz": 87221, "linear_dense_total": 4718592, "linear_dense_nnz": 1434572}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637844, "linear_attention_total": 2359296, "linear_attention_nnz": 157517, "linear_dense_total": 4718592, "linear_dense_nnz": 1480327}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1723746, "linear_attention_total": 2359296, "linear_attention_nnz": 188172, "linear_dense_total": 4718592, "linear_dense_nnz": 1535574}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742961, "linear_attention_total": 2359296, "linear_attention_nnz": 230341, "linear_dense_total": 4718592, "linear_dense_nnz": 1512620}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1687428, "linear_attention_total": 2359296, "linear_attention_nnz": 240387, "linear_dense_total": 4718592, "linear_dense_nnz": 1447041}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1623377, "linear_attention_total": 2359296, "linear_attention_nnz": 195780, "linear_dense_total": 4718592, "linear_dense_nnz": 1427597}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1429982, "linear_attention_total": 2359296, "linear_attention_nnz": 184963, "linear_dense_total": 4718592, "linear_dense_nnz": 1245019}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130199, "linear_attention_total": 2359296, "linear_attention_nnz": 172954, "linear_dense_total": 4718592, "linear_dense_nnz": 957245}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 773896, "linear_attention_total": 2359296, "linear_attention_nnz": 138133, "linear_dense_total": 4718592, "linear_dense_nnz": 635763}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417863, "linear_attention_total": 2359296, "linear_attention_nnz": 112972, "linear_dense_total": 4718592, "linear_dense_nnz": 304891}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279992, "linear_attention_total": 2359296, "linear_attention_nnz": 75446, "linear_dense_total": 4718592, "linear_dense_nnz": 204546}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 182829, "linear_attention_total": 2359296, "linear_attention_nnz": 38439, "linear_dense_total": 4718592, "linear_dense_nnz": 144390}}, "total_sparsity": 65.00199746198996, "linear_sparsity": 83.3378850677867}, "speed": {"eval_elapsed_time": 35.92588178999722}, "opt_eval_metrics": {"exact_match": 80.22705771050141, "f1": 88.08154392563726}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37366845, "linear_total": 84934656, "linear_nnz": 13408315, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1442154, "linear_attention_total": 2359296, "linear_attention_nnz": 79341, "linear_dense_total": 4718592, "linear_dense_nnz": 1362813}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1557975, "linear_attention_total": 2359296, "linear_attention_nnz": 146964, "linear_dense_total": 4718592, "linear_dense_nnz": 1411011}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637409, "linear_attention_total": 2359296, "linear_attention_nnz": 173655, "linear_dense_total": 4718592, "linear_dense_nnz": 1463754}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1655712, "linear_attention_total": 2359296, "linear_attention_nnz": 213353, "linear_dense_total": 4718592, "linear_dense_nnz": 1442359}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601748, "linear_attention_total": 2359296, "linear_attention_nnz": 221518, "linear_dense_total": 4718592, "linear_dense_nnz": 1380230}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1539647, "linear_attention_total": 2359296, "linear_attention_nnz": 179373, "linear_dense_total": 4718592, "linear_dense_nnz": 1360274}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1352289, "linear_attention_total": 2359296, "linear_attention_nnz": 168393, "linear_dense_total": 4718592, "linear_dense_nnz": 1183896}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1066215, "linear_attention_total": 2359296, "linear_attention_nnz": 159612, "linear_dense_total": 4718592, "linear_dense_nnz": 906603}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727923, "linear_attention_total": 2359296, "linear_attention_nnz": 127230, "linear_dense_total": 4718592, "linear_dense_nnz": 600693}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 390947, "linear_attention_total": 2359296, "linear_attention_nnz": 105257, "linear_dense_total": 4718592, "linear_dense_nnz": 285690}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 262617, "linear_attention_total": 2359296, "linear_attention_nnz": 70746, "linear_dense_total": 4718592, "linear_dense_nnz": 191871}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 173679, "linear_attention_total": 2359296, "linear_attention_nnz": 36271, "linear_dense_total": 4718592, "linear_dense_nnz": 137408}}, "total_sparsity": 65.68486388119823, "linear_sparsity": 84.21337575088313}, "speed": {"eval_elapsed_time": 35.89134427602403}, "opt_eval_metrics": {"exact_match": 80.53926206244087, "f1": 88.07603620459668}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 45252556, "linear_total": 84934656, "linear_nnz": 21294026, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2152743, "linear_attention_total": 2359296, "linear_attention_nnz": 158912, "linear_dense_total": 4718592, "linear_dense_nnz": 1993831}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2265132, "linear_attention_total": 2359296, "linear_attention_nnz": 234395, "linear_dense_total": 4718592, "linear_dense_nnz": 2030737}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415512, "linear_attention_total": 2359296, "linear_attention_nnz": 301048, "linear_dense_total": 4718592, "linear_dense_nnz": 2114464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465567, "linear_attention_total": 2359296, "linear_attention_nnz": 358791, "linear_dense_total": 4718592, "linear_dense_nnz": 2106776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2457267, "linear_attention_total": 2359296, "linear_attention_nnz": 398673, "linear_dense_total": 4718592, "linear_dense_nnz": 2058594}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2410577, "linear_attention_total": 2359296, "linear_attention_nnz": 367333, "linear_dense_total": 4718592, "linear_dense_nnz": 2043244}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2206780, "linear_attention_total": 2359296, "linear_attention_nnz": 344288, "linear_dense_total": 4718592, "linear_dense_nnz": 1862492}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1819031, "linear_attention_total": 2359296, "linear_attention_nnz": 304514, "linear_dense_total": 4718592, "linear_dense_nnz": 1514517}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1364821, "linear_attention_total": 2359296, "linear_attention_nnz": 265513, "linear_dense_total": 4718592, "linear_dense_nnz": 1099308}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 828990, "linear_attention_total": 2359296, "linear_attention_nnz": 201714, "linear_dense_total": 4718592, "linear_dense_nnz": 627276}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574541, "linear_attention_total": 2359296, "linear_attention_nnz": 134277, "linear_dense_total": 4718592, "linear_dense_nnz": 440264}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 333065, "linear_attention_total": 2359296, "linear_attention_nnz": 63309, "linear_dense_total": 4718592, "linear_dense_nnz": 269756}}, "total_sparsity": 58.4431701722824, "linear_sparsity": 74.92893124804085}, "speed": {"eval_elapsed_time": 38.17887881118804}, "opt_eval_metrics": {"exact_match": 81.40018921475875, "f1": 88.66263407974378}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 38467586, "linear_total": 84934656, "linear_nnz": 14509056, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1740288, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 940032, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 448512}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1992192, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1728000, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1651200, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 717312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245696, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877056, "linear_attention_total": 2359296, "linear_attention_nnz": 442368, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049088, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 629760, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 233472}}, "total_sparsity": 64.67401918059409, "linear_sparsity": 82.9173900462963}, "speed": {"eval_elapsed_time": 11.783776527037844}, "opt_eval_metrics": {"exact_match": 77.9848628192999, "f1": 85.88807770994393}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 38065154, "linear_total": 84934656, "linear_nnz": 14106624, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1669632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 913920, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 422400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969152, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 631296}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1559040, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1219584, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 471552}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 955392, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1090560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 431616, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.04358500448319, "linear_sparsity": 83.3912037037037}, "speed": {"eval_elapsed_time": 11.86458179494366}, "opt_eval_metrics": {"exact_match": 77.94701986754967, "f1": 85.90050035022541}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38095874, "linear_total": 84934656, "linear_nnz": 14137344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907776, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1967616, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 591360}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1711104, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1608192, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1214976, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 625152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1161216, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 953856, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 413184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1041408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 482304, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 757248, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.0153738728886, "linear_sparsity": 83.35503472222221}, "speed": {"eval_elapsed_time": 11.63978576194495}, "opt_eval_metrics": {"exact_match": 77.43614001892148, "f1": 85.51882546766822}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35435778, "linear_total": 84934656, "linear_nnz": 11477248, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887040, "linear_attention_total": 2359296, "linear_attention_nnz": 384768, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1057792, "linear_attention_total": 2359296, "linear_attention_nnz": 355840, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1285888, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497088, "linear_attention_total": 2359296, "linear_attention_nnz": 672256, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1350912, "linear_attention_total": 2359296, "linear_attention_nnz": 418560, "linear_dense_total": 4718592, "linear_dense_nnz": 932352}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1395712, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1154816, "linear_attention_total": 2359296, "linear_attention_nnz": 498944, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059840, "linear_attention_total": 2359296, "linear_attention_nnz": 497664, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 609024, "linear_attention_total": 2359296, "linear_attention_nnz": 297216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 436224, "linear_attention_total": 2359296, "linear_attention_nnz": 316416, "linear_dense_total": 4718592, "linear_dense_nnz": 119808}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 256512, "linear_dense_total": 4718592, "linear_dense_nnz": 115200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371200, "linear_attention_total": 2359296, "linear_attention_nnz": 150016, "linear_dense_total": 4718592, "linear_dense_nnz": 221184}}, "total_sparsity": 67.45822277621669, "linear_sparsity": 86.4869671103395}, "speed": {"eval_elapsed_time": 13.864284622017294}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.28341140334766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 49759613, "linear_total": 84934656, "linear_nnz": 25846272, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3251712, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2803200, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1918464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3320832, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3353088, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2075136}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2469888, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2322432, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1929216}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2098176, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1508352}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1641984, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1248768}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638912, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566784, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1075200, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 583680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1304064, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1009152}}, "total_sparsity": 54.304199529987116, "linear_sparsity": 69.56922743055556}, "speed": {"eval_elapsed_time": 14.753634401829913}, "opt_eval_metrics": {"exact_match": 77.8713339640492, "f1": 85.86552240887988}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 47529298, "linear_total": 84934656, "linear_nnz": 23617536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2446848, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1660416}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2978304, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1995264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2216448, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1724928}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1824768, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1333248}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526784, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1133568}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35236717199184, "linear_sparsity": 72.19328703703704}, "speed": {"eval_elapsed_time": 14.023887678980827}, "opt_eval_metrics": {"exact_match": 78.06054872280038, "f1": 85.94002543374285}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47521613, "linear_total": 84934656, "linear_nnz": 23609856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2445312, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1658880}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2976768, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1993728}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214912, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1723392}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1823232, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1525248, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1132032}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35942454654601, "linear_sparsity": 72.2023292824074}, "speed": {"eval_elapsed_time": 14.008215571055189}, "opt_eval_metrics": {"exact_match": 78.10785241248817, "f1": 86.00835164251778}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35398714, "linear_total": 84934656, "linear_nnz": 11493376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907264, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1074176, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253376, "linear_attention_total": 2359296, "linear_attention_nnz": 402432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1508352, "linear_attention_total": 2359296, "linear_attention_nnz": 681984, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1328640, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1422848, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1094656, "linear_attention_total": 2359296, "linear_attention_nnz": 449536, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1102848, "linear_attention_total": 2359296, "linear_attention_nnz": 577536, "linear_dense_total": 4718592, "linear_dense_nnz": 525312}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 628224, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 434176, "linear_attention_total": 2359296, "linear_attention_nnz": 320512, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 377344, "linear_attention_total": 2359296, "linear_attention_nnz": 256000, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 361472, "linear_attention_total": 2359296, "linear_attention_nnz": 146432, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 67.49225980035152, "linear_sparsity": 86.46797839506173}, "speed": {"eval_elapsed_time": 13.743516992079094}, "opt_eval_metrics": {"exact_match": 77.18070009460737, "f1": 85.6109462422114}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold_apme-sigmoied_threshold_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 61067266, "linear_total": 84934656, "linear_nnz": 37108736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3221504, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 2607104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3504128, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 2899968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4439040, "linear_attention_total": 2359296, "linear_attention_nnz": 730112, "linear_dense_total": 4718592, "linear_dense_nnz": 3708928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4859904, "linear_attention_total": 2359296, "linear_attention_nnz": 1044480, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4734976, "linear_attention_total": 2359296, "linear_attention_nnz": 1012736, "linear_dense_total": 4718592, "linear_dense_nnz": 3722240}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4660224, "linear_attention_total": 2359296, "linear_attention_nnz": 882688, "linear_dense_total": 4718592, "linear_dense_nnz": 3777536}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4239360, "linear_attention_total": 2359296, "linear_attention_nnz": 980992, "linear_dense_total": 4718592, "linear_dense_nnz": 3258368}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3137536, "linear_attention_total": 2359296, "linear_attention_nnz": 903168, "linear_dense_total": 4718592, "linear_dense_nnz": 2234368}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835008, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 1124352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877568, "linear_attention_total": 2359296, "linear_attention_nnz": 552960, "linear_dense_total": 4718592, "linear_dense_nnz": 324608}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852992, "linear_attention_total": 2359296, "linear_attention_nnz": 401408, "linear_dense_total": 4718592, "linear_dense_nnz": 451584}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746496, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 501760}}, "total_sparsity": 43.920030037508496, "linear_sparsity": 56.309076003086425}, "speed": {"eval_elapsed_time": 25.03221789188683}, "opt_eval_metrics": {"exact_match": 79.64049195837275, "f1": 87.40026291426761}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 43891202, "linear_total": 84934656, "linear_nnz": 19932672, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045184, "linear_attention_total": 2359296, "linear_attention_nnz": 427776, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102784, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 1708032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2424576, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 1955328}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2502912, "linear_attention_total": 2359296, "linear_attention_nnz": 579840, "linear_dense_total": 4718592, "linear_dense_nnz": 1923072}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2376960, "linear_attention_total": 2359296, "linear_attention_nnz": 539904, "linear_dense_total": 4718592, "linear_dense_nnz": 1837056}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2201856, "linear_attention_total": 2359296, "linear_attention_nnz": 424704, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1907712, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 1468416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1580544, "linear_attention_total": 2359296, "linear_attention_nnz": 428544, "linear_dense_total": 4718592, "linear_dense_nnz": 1152000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1095168, "linear_attention_total": 2359296, "linear_attention_nnz": 397824, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527616, "linear_attention_total": 2359296, "linear_attention_nnz": 235776, "linear_dense_total": 4718592, "linear_dense_nnz": 291840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 428544, "linear_attention_total": 2359296, "linear_attention_nnz": 182784, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738816, "linear_attention_total": 2359296, "linear_attention_nnz": 112128, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}}, "total_sparsity": 59.6933438975695, "linear_sparsity": 76.53175636574075}, "speed": {"eval_elapsed_time": 23.130286294035614}, "opt_eval_metrics": {"exact_match": 79.15799432355723, "f1": 86.94169166073364}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49229570, "linear_total": 84934656, "linear_nnz": 25271040, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214400, "linear_attention_total": 2359296, "linear_attention_nnz": 721408, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2390784, "linear_attention_total": 2359296, "linear_attention_nnz": 635136, "linear_dense_total": 4718592, "linear_dense_nnz": 1755648}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850560, "linear_attention_total": 2359296, "linear_attention_nnz": 972032, "linear_dense_total": 4718592, "linear_dense_nnz": 1878528}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3188736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 1932288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3149824, "linear_attention_total": 2359296, "linear_attention_nnz": 1260544, "linear_dense_total": 4718592, "linear_dense_nnz": 1889280}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 1784832}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2455040, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015744, "linear_attention_total": 2359296, "linear_attention_nnz": 988160, "linear_dense_total": 4718592, "linear_dense_nnz": 1027584}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1550080, "linear_attention_total": 2359296, "linear_attention_nnz": 903424, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 886784, "linear_attention_total": 2359296, "linear_attention_nnz": 636416, "linear_dense_total": 4718592, "linear_dense_nnz": 250368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 682752, "linear_attention_total": 2359296, "linear_attention_nnz": 484608, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 980224, "linear_attention_total": 2359296, "linear_attention_nnz": 313600, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}}, "total_sparsity": 54.79095450471988, "linear_sparsity": 70.2464916087963}, "speed": {"eval_elapsed_time": 20.265998144168407}, "opt_eval_metrics": {"exact_match": 80.6244087038789, "f1": 88.07723643002453}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 42173698, "linear_total": 84934656, "linear_nnz": 18215168, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1516544, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1758464, "linear_attention_total": 2359296, "linear_attention_nnz": 564992, "linear_dense_total": 4718592, "linear_dense_nnz": 1193472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2030080, "linear_attention_total": 2359296, "linear_attention_nnz": 646144, "linear_dense_total": 4718592, "linear_dense_nnz": 1383936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2328832, "linear_attention_total": 2359296, "linear_attention_nnz": 969472, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2283264, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1425408}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2099200, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 1396224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1846784, "linear_attention_total": 2359296, "linear_attention_nnz": 774656, "linear_dense_total": 4718592, "linear_dense_nnz": 1072128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1589760, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 783360}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 967424, "linear_attention_total": 2359296, "linear_attention_nnz": 520448, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 617216, "linear_attention_total": 2359296, "linear_attention_nnz": 435968, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 521984, "linear_attention_total": 2359296, "linear_attention_nnz": 354560, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655616, "linear_attention_total": 2359296, "linear_attention_nnz": 231680, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.27058124647028, "linear_sparsity": 78.55390383873457}, "speed": {"eval_elapsed_time": 16.755018649157137}, "opt_eval_metrics": {"exact_match": 79.25260170293284, "f1": 86.93528973939952}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42038274, "linear_total": 84934656, "linear_nnz": 18079744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1493248, "linear_attention_total": 2359296, "linear_attention_nnz": 519424, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757440, "linear_attention_total": 2359296, "linear_attention_nnz": 565504, "linear_dense_total": 4718592, "linear_dense_nnz": 1191936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028800, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1382400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2297088, "linear_attention_total": 2359296, "linear_attention_nnz": 937728, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2270464, "linear_attention_total": 2359296, "linear_attention_nnz": 846592, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081792, "linear_attention_total": 2359296, "linear_attention_nnz": 688640, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815296, "linear_attention_total": 2359296, "linear_attention_nnz": 744704, "linear_dense_total": 4718592, "linear_dense_nnz": 1070592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1613312, "linear_attention_total": 2359296, "linear_attention_nnz": 831488, "linear_dense_total": 4718592, "linear_dense_nnz": 781824}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 522496, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 594944, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 513792, "linear_attention_total": 2359296, "linear_attention_nnz": 346368, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 644096, "linear_attention_total": 2359296, "linear_attention_nnz": 220160, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.39494531824976, "linear_sparsity": 78.7133487654321}, "speed": {"eval_elapsed_time": 16.721035415073857}, "opt_eval_metrics": {"exact_match": 79.2620624408704, "f1": 86.97825692623259}}}, "base_speed_report": {"eval_elapsed_time": 38.708956059999764}} \ No newline at end of file diff --git a/analysis/files/results/results13.json b/analysis/files/results/results13.json deleted file mode 100644 index bf0632ac..00000000 --- a/analysis/files/results/results13.json +++ /dev/null @@ -1 +0,0 @@ -{"checkpoints": {"/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": {"stats": {"total": 90984386, "nnz": 40951962, "linear_total": 67043328, "linear_nnz": 17043456, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1268736, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "1": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1296384, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "2": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1440768, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "3": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2006016, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "4": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1709568, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1863168, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "6": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1628160, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1901568, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 525312}, "8": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 923136, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1096704, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1104384, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 804864, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 54.990121052199, "linear_sparsity": 74.57844574780059}, "speed": {"eval_elapsed_time": 13.965540712000802}, "opt_eval_metrics": {"exact_match": 78.63765373699148, "f1": 86.69392512957342}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-20000": {"stats": {"total": 99840386, "nnz": 50390485, "linear_total": 75890688, "linear_nnz": 26472960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1903104, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 526848}, "1": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1735680, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 752640}, "2": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2840064, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 873984}, "3": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2721792, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "4": {"total": 768, "nnz": 768, "linear_total": 6881280, "linear_nnz": 3208704, "linear_attention_total": 2162688, "linear_attention_nnz": 2162688, "linear_dense_total": 4718592, "linear_dense_nnz": 1046016}, "5": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2952192, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 986112}, "6": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2509824, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 740352}, "7": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2131968, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "8": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2259456, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1293312, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 1671168, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 98304}, "11": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1245696, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}}, "total_sparsity": 49.52895614806617, "linear_sparsity": 65.11698510362694}, "speed": {"eval_elapsed_time": 19.599604887887836}, "opt_eval_metrics": {"exact_match": 80.86092715231788, "f1": 88.26868699204444}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-22132": {"stats": {"total": 99840386, "nnz": 50390485, "linear_total": 75890688, "linear_nnz": 26472960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1903104, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 526848}, "1": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1735680, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 752640}, "2": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2840064, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 873984}, "3": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2721792, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "4": {"total": 768, "nnz": 768, "linear_total": 6881280, "linear_nnz": 3208704, "linear_attention_total": 2162688, "linear_attention_nnz": 2162688, "linear_dense_total": 4718592, "linear_dense_nnz": 1046016}, "5": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2952192, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 986112}, "6": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2509824, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 740352}, "7": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2131968, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "8": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2259456, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1293312, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 1671168, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 98304}, "11": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1245696, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}}, "total_sparsity": 49.52895614806617, "linear_sparsity": 65.11698510362694}, "speed": {"eval_elapsed_time": 19.6180354738608}, "opt_eval_metrics": {"exact_match": 80.87038789025544, "f1": 88.24613086360249}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-15000": {"stats": {"total": 95510786, "nnz": 52448657, "linear_total": 71565312, "linear_nnz": 28531200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2125824, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2357760, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3317760, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3334656, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3495936, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2809344, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2646528, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2363904, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2119680, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1428480, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1367040, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1164288, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 45.08614241746477, "linear_sparsity": 60.1326407967033}, "speed": {"eval_elapsed_time": 18.542597664985806}, "opt_eval_metrics": {"exact_match": 80.80416272469253, "f1": 88.20260662536118}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-20000": {"stats": {"total": 95510786, "nnz": 52448657, "linear_total": 71565312, "linear_nnz": 28531200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2125824, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2357760, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3317760, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3334656, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3495936, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2809344, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2646528, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2363904, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2119680, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1428480, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1367040, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1164288, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 45.08614241746477, "linear_sparsity": 60.1326407967033}, "speed": {"eval_elapsed_time": 18.527840161928907}, "opt_eval_metrics": {"exact_match": 80.6717123935667, "f1": 88.128983727943}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": {"stats": {"total": 95510786, "nnz": 52448657, "linear_total": 71565312, "linear_nnz": 28531200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2125824, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2357760, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3317760, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3334656, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3495936, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2809344, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2646528, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2363904, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2119680, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1428480, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1367040, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1164288, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 45.08614241746477, "linear_sparsity": 60.1326407967033}, "speed": {"eval_elapsed_time": 18.512789956992492}, "opt_eval_metrics": {"exact_match": 80.68117313150425, "f1": 88.11014400914335}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13/checkpoint-22132": {"stats": {"total": 96101186, "nnz": 47671853, "linear_total": 72155136, "linear_nnz": 23757312, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1420800, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 634368}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1703424, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "2": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2786304, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2649600, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1076736}, "4": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 3124224, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 1158144}, "5": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2449920, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "6": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2388480, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2006016, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 1910784, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1122816, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1291776, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 112128}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 903168, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}}, "total_sparsity": 50.394105437991165, "linear_sparsity": 67.07467643051771}, "speed": {"eval_elapsed_time": 17.48165646987036}, "opt_eval_metrics": {"exact_match": 80.69063386944181, "f1": 88.06386432532665}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-22132": {"stats": {"total": 99446786, "nnz": 54738530, "linear_total": 75497472, "linear_nnz": 30818304, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2502144, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1125888}, "1": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 2268672, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3293184, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1523712}, "3": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3325440, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1555968}, "4": {"total": 768, "nnz": 768, "linear_total": 6881280, "linear_nnz": 3780096, "linear_attention_total": 2162688, "linear_attention_nnz": 2162688, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "5": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 3480576, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 1514496}, "6": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2904576, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1135104}, "7": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2420736, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "8": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2440704, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 474624}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1388544, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 208896}, "10": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1545216, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "11": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1468416, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 485376}}, "total_sparsity": 44.95696422004025, "linear_sparsity": 59.1796875}, "speed": {"eval_elapsed_time": 20.92965194582939}, "opt_eval_metrics": {"exact_match": 81.69347209082308, "f1": 88.72194531479171}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-22132": {"stats": {"total": 93345986, "nnz": 42356011, "linear_total": 69402624, "linear_nnz": 18445824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1198080, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1379328, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1878528, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "3": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2090496, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 714240}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2210304, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1726464, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 743424}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1747968, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1826304, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "8": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1443840, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 264192}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1084416, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 101376}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1070592, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 789504, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}}, "total_sparsity": 54.62471091151151, "linear_sparsity": 73.42200779036827}, "speed": {"eval_elapsed_time": 14.913816268090159}, "opt_eval_metrics": {"exact_match": 79.4228949858089, "f1": 87.22907143184382}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-20000": {"stats": {"total": 93149186, "nnz": 46161559, "linear_total": 69206016, "linear_nnz": 22248960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1634304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1887744, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2400768, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2588160, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2655744, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 2199552, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2131968, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2092032, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1417728, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1155072, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1130496, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 955392, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 50.443411282198426, "linear_sparsity": 67.85111860795455}, "speed": {"eval_elapsed_time": 15.928924348205328}, "opt_eval_metrics": {"exact_match": 80.01892147587512, "f1": 87.70568682399205}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": {"stats": {"total": 93149186, "nnz": 46161559, "linear_total": 69206016, "linear_nnz": 22248960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1634304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1887744, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2400768, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2588160, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2655744, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 2199552, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2131968, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2092032, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1417728, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1155072, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1130496, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 955392, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 50.443411282198426, "linear_sparsity": 67.85111860795455}, "speed": {"eval_elapsed_time": 15.946681587956846}, "opt_eval_metrics": {"exact_match": 80.02838221381268, "f1": 87.70940223967354}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15/checkpoint-22132": {"stats": {"total": 93739586, "nnz": 40333447, "linear_total": 69795840, "linear_nnz": 16424448, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 967680, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1085952, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 299520}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1586688, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 407040}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2013696, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1872384, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1416192, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1517568, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1645056, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 268800}, "8": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1534464, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1056768, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 73728}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1041408, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 58368}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 686592, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 56.972876965767696, "linear_sparsity": 76.46786971830986}, "speed": {"eval_elapsed_time": 14.454034935915843}, "opt_eval_metrics": {"exact_match": 78.82686849574267, "f1": 86.75497848244157}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40/checkpoint-22132": {"stats": {"total": 97281986, "nnz": 45486623, "linear_total": 73334784, "linear_nnz": 21573120, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1477632, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}, "1": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1466880, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 483840}, "2": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2388480, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 619008}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2230272, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 657408}, "4": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2671104, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 705024}, "5": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2241024, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 668160}, "6": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2088960, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 516096}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1760256, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 384000}, "8": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 1973760, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 204288}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1271808, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1253376, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 73728}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 749568, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 159744}}, "total_sparsity": 53.24250164876363, "linear_sparsity": 70.58269101876675}, "speed": {"eval_elapsed_time": 17.269889486022294}, "opt_eval_metrics": {"exact_match": 80.23651844843897, "f1": 87.68464122182475}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l20--2021-01-20--19-01-34/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 63685078, "linear_total": 84934656, "linear_nnz": 39741824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3854752, "linear_attention_total": 2359296, "linear_attention_nnz": 261808, "linear_dense_total": 4718592, "linear_dense_nnz": 3592944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4073232, "linear_attention_total": 2359296, "linear_attention_nnz": 407856, "linear_dense_total": 4718592, "linear_dense_nnz": 3665376}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4218016, "linear_attention_total": 2359296, "linear_attention_nnz": 470352, "linear_dense_total": 4718592, "linear_dense_nnz": 3747664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4318192, "linear_attention_total": 2359296, "linear_attention_nnz": 586320, "linear_dense_total": 4718592, "linear_dense_nnz": 3731872}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4294272, "linear_attention_total": 2359296, "linear_attention_nnz": 598112, "linear_dense_total": 4718592, "linear_dense_nnz": 3696160}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4191568, "linear_attention_total": 2359296, "linear_attention_nnz": 540976, "linear_dense_total": 4718592, "linear_dense_nnz": 3650592}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4018960, "linear_attention_total": 2359296, "linear_attention_nnz": 518320, "linear_dense_total": 4718592, "linear_dense_nnz": 3500640}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3692480, "linear_attention_total": 2359296, "linear_attention_nnz": 494608, "linear_dense_total": 4718592, "linear_dense_nnz": 3197872}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3064736, "linear_attention_total": 2359296, "linear_attention_nnz": 381872, "linear_dense_total": 4718592, "linear_dense_nnz": 2682864}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1882688, "linear_attention_total": 2359296, "linear_attention_nnz": 281888, "linear_dense_total": 4718592, "linear_dense_nnz": 1600800}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307728, "linear_attention_total": 2359296, "linear_attention_nnz": 212544, "linear_dense_total": 4718592, "linear_dense_nnz": 1095184}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 825200, "linear_attention_total": 2359296, "linear_attention_nnz": 122704, "linear_dense_total": 4718592, "linear_dense_nnz": 702496}}, "total_sparsity": 41.51601184669167, "linear_sparsity": 53.208942177854944}, "speed": {"eval_elapsed_time": 33.01966134808026}, "opt_eval_metrics": {"exact_match": 80.52980132450331, "f1": 88.02284574429551}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l20--2021-01-20--19-01-34/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 63495382, "linear_total": 84934656, "linear_nnz": 39552208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3842976, "linear_attention_total": 2359296, "linear_attention_nnz": 258016, "linear_dense_total": 4718592, "linear_dense_nnz": 3584960}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4064144, "linear_attention_total": 2359296, "linear_attention_nnz": 404784, "linear_dense_total": 4718592, "linear_dense_nnz": 3659360}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4202080, "linear_attention_total": 2359296, "linear_attention_nnz": 460752, "linear_dense_total": 4718592, "linear_dense_nnz": 3741328}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4301216, "linear_attention_total": 2359296, "linear_attention_nnz": 577184, "linear_dense_total": 4718592, "linear_dense_nnz": 3724032}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4277440, "linear_attention_total": 2359296, "linear_attention_nnz": 587792, "linear_dense_total": 4718592, "linear_dense_nnz": 3689648}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4172464, "linear_attention_total": 2359296, "linear_attention_nnz": 530480, "linear_dense_total": 4718592, "linear_dense_nnz": 3641984}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3999744, "linear_attention_total": 2359296, "linear_attention_nnz": 508336, "linear_dense_total": 4718592, "linear_dense_nnz": 3491408}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3673360, "linear_attention_total": 2359296, "linear_attention_nnz": 486304, "linear_dense_total": 4718592, "linear_dense_nnz": 3187056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3043376, "linear_attention_total": 2359296, "linear_attention_nnz": 374032, "linear_dense_total": 4718592, "linear_dense_nnz": 2669344}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1863968, "linear_attention_total": 2359296, "linear_attention_nnz": 276992, "linear_dense_total": 4718592, "linear_dense_nnz": 1586976}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1293056, "linear_attention_total": 2359296, "linear_attention_nnz": 209136, "linear_dense_total": 4718592, "linear_dense_nnz": 1083920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818384, "linear_attention_total": 2359296, "linear_attention_nnz": 120976, "linear_dense_total": 4718592, "linear_dense_nnz": 697408}}, "total_sparsity": 41.69021558428826, "linear_sparsity": 53.432191448447156}, "speed": {"eval_elapsed_time": 32.776620995020494}, "opt_eval_metrics": {"exact_match": 80.09460737937559, "f1": 87.80889686617203}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l40--2021-01-20--19-02-03/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 54109530, "linear_total": 84934656, "linear_nnz": 30171936, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3151120, "linear_attention_total": 2359296, "linear_attention_nnz": 172416, "linear_dense_total": 4718592, "linear_dense_nnz": 2978704}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3411360, "linear_attention_total": 2359296, "linear_attention_nnz": 308192, "linear_dense_total": 4718592, "linear_dense_nnz": 3103168}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3491136, "linear_attention_total": 2359296, "linear_attention_nnz": 285568, "linear_dense_total": 4718592, "linear_dense_nnz": 3205568}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3603168, "linear_attention_total": 2359296, "linear_attention_nnz": 437904, "linear_dense_total": 4718592, "linear_dense_nnz": 3165264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3410880, "linear_attention_total": 2359296, "linear_attention_nnz": 321040, "linear_dense_total": 4718592, "linear_dense_nnz": 3089840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3356416, "linear_attention_total": 2359296, "linear_attention_nnz": 332784, "linear_dense_total": 4718592, "linear_dense_nnz": 3023632}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3072896, "linear_attention_total": 2359296, "linear_attention_nnz": 288464, "linear_dense_total": 4718592, "linear_dense_nnz": 2784432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2683232, "linear_attention_total": 2359296, "linear_attention_nnz": 328464, "linear_dense_total": 4718592, "linear_dense_nnz": 2354768}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1957200, "linear_attention_total": 2359296, "linear_attention_nnz": 204832, "linear_dense_total": 4718592, "linear_dense_nnz": 1752368}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 905552, "linear_attention_total": 2359296, "linear_attention_nnz": 189616, "linear_dense_total": 4718592, "linear_dense_nnz": 715936}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667120, "linear_attention_total": 2359296, "linear_attention_nnz": 140384, "linear_dense_total": 4718592, "linear_dense_nnz": 526736}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 461856, "linear_attention_total": 2359296, "linear_attention_nnz": 84608, "linear_dense_total": 4718592, "linear_dense_nnz": 377248}}, "total_sparsity": 50.30953543778212, "linear_sparsity": 64.47629575376158}, "speed": {"eval_elapsed_time": 29.022300366079435}, "opt_eval_metrics": {"exact_match": 79.64049195837275, "f1": 87.31499809166372}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l40--2021-01-20--19-02-03/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53891686, "linear_total": 84934656, "linear_nnz": 29954112, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3130496, "linear_attention_total": 2359296, "linear_attention_nnz": 169136, "linear_dense_total": 4718592, "linear_dense_nnz": 2961360}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3393488, "linear_attention_total": 2359296, "linear_attention_nnz": 304464, "linear_dense_total": 4718592, "linear_dense_nnz": 3089024}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3470880, "linear_attention_total": 2359296, "linear_attention_nnz": 279216, "linear_dense_total": 4718592, "linear_dense_nnz": 3191664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3580464, "linear_attention_total": 2359296, "linear_attention_nnz": 429728, "linear_dense_total": 4718592, "linear_dense_nnz": 3150736}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3390736, "linear_attention_total": 2359296, "linear_attention_nnz": 314688, "linear_dense_total": 4718592, "linear_dense_nnz": 3076048}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3334432, "linear_attention_total": 2359296, "linear_attention_nnz": 326416, "linear_dense_total": 4718592, "linear_dense_nnz": 3008016}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3048464, "linear_attention_total": 2359296, "linear_attention_nnz": 281984, "linear_dense_total": 4718592, "linear_dense_nnz": 2766480}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2658992, "linear_attention_total": 2359296, "linear_attention_nnz": 320352, "linear_dense_total": 4718592, "linear_dense_nnz": 2338640}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1936656, "linear_attention_total": 2359296, "linear_attention_nnz": 200608, "linear_dense_total": 4718592, "linear_dense_nnz": 1736048}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 892160, "linear_attention_total": 2359296, "linear_attention_nnz": 185008, "linear_dense_total": 4718592, "linear_dense_nnz": 707152}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 660320, "linear_attention_total": 2359296, "linear_attention_nnz": 137920, "linear_dense_total": 4718592, "linear_dense_nnz": 522400}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 457024, "linear_attention_total": 2359296, "linear_attention_nnz": 82480, "linear_dense_total": 4718592, "linear_dense_nnz": 374544}}, "total_sparsity": 50.50958835936713, "linear_sparsity": 64.7327564380787}, "speed": {"eval_elapsed_time": 29.01672533689998}, "opt_eval_metrics": {"exact_match": 79.06338694418164, "f1": 86.86293366416082}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53990689, "linear_total": 84934656, "linear_nnz": 30067968, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2828544, "linear_attention_total": 2359296, "linear_attention_nnz": 880896, "linear_dense_total": 4718592, "linear_dense_nnz": 1947648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2819840, "linear_attention_total": 2359296, "linear_attention_nnz": 849152, "linear_dense_total": 4718592, "linear_dense_nnz": 1970688}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3367424, "linear_attention_total": 2359296, "linear_attention_nnz": 1169408, "linear_dense_total": 4718592, "linear_dense_nnz": 2198016}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3521280, "linear_attention_total": 2359296, "linear_attention_nnz": 1352448, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3693824, "linear_attention_total": 2359296, "linear_attention_nnz": 1524992, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3554560, "linear_attention_total": 2359296, "linear_attention_nnz": 1511680, "linear_dense_total": 4718592, "linear_dense_nnz": 2042880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2935296, "linear_attention_total": 2359296, "linear_attention_nnz": 1336320, "linear_dense_total": 4718592, "linear_dense_nnz": 1598976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2452992, "linear_attention_total": 2359296, "linear_attention_nnz": 1178112, "linear_dense_total": 4718592, "linear_dense_nnz": 1274880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1836032, "linear_attention_total": 2359296, "linear_attention_nnz": 1134080, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1024000, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 321024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 812032, "linear_attention_total": 2359296, "linear_attention_nnz": 583168, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222144, "linear_attention_total": 2359296, "linear_attention_nnz": 397312, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}}, "total_sparsity": 50.41867082482094, "linear_sparsity": 64.59870515046296}, "speed": {"eval_elapsed_time": 23.56436571292579}, "opt_eval_metrics": {"exact_match": 80.93661305581836, "f1": 88.35425478567389}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53994017, "linear_total": 84934656, "linear_nnz": 30071296, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2825984, "linear_attention_total": 2359296, "linear_attention_nnz": 878336, "linear_dense_total": 4718592, "linear_dense_nnz": 1947648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2823424, "linear_attention_total": 2359296, "linear_attention_nnz": 852736, "linear_dense_total": 4718592, "linear_dense_nnz": 1970688}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3366400, "linear_attention_total": 2359296, "linear_attention_nnz": 1168384, "linear_dense_total": 4718592, "linear_dense_nnz": 2198016}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3529216, "linear_attention_total": 2359296, "linear_attention_nnz": 1360384, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3694080, "linear_attention_total": 2359296, "linear_attention_nnz": 1525248, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3562240, "linear_attention_total": 2359296, "linear_attention_nnz": 1519360, "linear_dense_total": 4718592, "linear_dense_nnz": 2042880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2944768, "linear_attention_total": 2359296, "linear_attention_nnz": 1345792, "linear_dense_total": 4718592, "linear_dense_nnz": 1598976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2450176, "linear_attention_total": 2359296, "linear_attention_nnz": 1175296, "linear_dense_total": 4718592, "linear_dense_nnz": 1274880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828864, "linear_attention_total": 2359296, "linear_attention_nnz": 1126912, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023488, "linear_attention_total": 2359296, "linear_attention_nnz": 702464, "linear_dense_total": 4718592, "linear_dense_nnz": 321024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 812032, "linear_attention_total": 2359296, "linear_attention_nnz": 583168, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1210624, "linear_attention_total": 2359296, "linear_attention_nnz": 385792, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}}, "total_sparsity": 50.41561461889819, "linear_sparsity": 64.5947868441358}, "speed": {"eval_elapsed_time": 23.61654355400242}, "opt_eval_metrics": {"exact_match": 81.11636707663197, "f1": 88.26635621180897}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 48926434, "linear_total": 84934656, "linear_nnz": 25008128, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102784, "linear_attention_total": 2359296, "linear_attention_nnz": 976896, "linear_dense_total": 4718592, "linear_dense_nnz": 1125888}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2018816, "linear_attention_total": 2359296, "linear_attention_nnz": 733184, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2776064, "linear_attention_total": 2359296, "linear_attention_nnz": 1252352, "linear_dense_total": 4718592, "linear_dense_nnz": 1523712}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2993664, "linear_attention_total": 2359296, "linear_attention_nnz": 1437696, "linear_dense_total": 4718592, "linear_dense_nnz": 1555968}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3162624, "linear_attention_total": 2359296, "linear_attention_nnz": 1545216, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3089408, "linear_attention_total": 2359296, "linear_attention_nnz": 1574912, "linear_dense_total": 4718592, "linear_dense_nnz": 1514496}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2505216, "linear_attention_total": 2359296, "linear_attention_nnz": 1370112, "linear_dense_total": 4718592, "linear_dense_nnz": 1135104}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2026496, "linear_attention_total": 2359296, "linear_attention_nnz": 1178624, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1665536, "linear_attention_total": 2359296, "linear_attention_nnz": 1190912, "linear_dense_total": 4718592, "linear_dense_nnz": 474624}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 957440, "linear_attention_total": 2359296, "linear_attention_nnz": 748544, "linear_dense_total": 4718592, "linear_dense_nnz": 208896}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 805888, "linear_attention_total": 2359296, "linear_attention_nnz": 636928, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 904192, "linear_attention_total": 2359296, "linear_attention_nnz": 418816, "linear_dense_total": 4718592, "linear_dense_nnz": 485376}}, "total_sparsity": 55.069333723048565, "linear_sparsity": 70.55603780864197}, "speed": {"eval_elapsed_time": 21.182856186991557}, "opt_eval_metrics": {"exact_match": 80.79470198675497, "f1": 88.10958975740277}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 48725622, "linear_total": 84934656, "linear_nnz": 24807424, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2098688, "linear_attention_total": 2359296, "linear_attention_nnz": 991232, "linear_dense_total": 4718592, "linear_dense_nnz": 1107456}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2003456, "linear_attention_total": 2359296, "linear_attention_nnz": 730112, "linear_dense_total": 4718592, "linear_dense_nnz": 1273344}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2731008, "linear_attention_total": 2359296, "linear_attention_nnz": 1225728, "linear_dense_total": 4718592, "linear_dense_nnz": 1505280}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2977280, "linear_attention_total": 2359296, "linear_attention_nnz": 1433600, "linear_dense_total": 4718592, "linear_dense_nnz": 1543680}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3176448, "linear_attention_total": 2359296, "linear_attention_nnz": 1566720, "linear_dense_total": 4718592, "linear_dense_nnz": 1609728}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3081216, "linear_attention_total": 2359296, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1508352}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2487808, "linear_attention_total": 2359296, "linear_attention_nnz": 1361920, "linear_dense_total": 4718592, "linear_dense_nnz": 1125888}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2003456, "linear_attention_total": 2359296, "linear_attention_nnz": 1166336, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1617408, "linear_attention_total": 2359296, "linear_attention_nnz": 1148928, "linear_dense_total": 4718592, "linear_dense_nnz": 468480}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 945664, "linear_attention_total": 2359296, "linear_attention_nnz": 738304, "linear_dense_total": 4718592, "linear_dense_nnz": 207360}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 787456, "linear_attention_total": 2359296, "linear_attention_nnz": 624640, "linear_dense_total": 4718592, "linear_dense_nnz": 162816}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 897536, "linear_attention_total": 2359296, "linear_attention_nnz": 419840, "linear_dense_total": 4718592, "linear_dense_nnz": 477696}}, "total_sparsity": 55.25374562922606, "linear_sparsity": 70.79234182098766}, "speed": {"eval_elapsed_time": 21.17357637709938}, "opt_eval_metrics": {"exact_match": 80.48249763481552, "f1": 88.07285498416482}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 48790134, "linear_total": 84934656, "linear_nnz": 24871936, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2086400, "linear_attention_total": 2359296, "linear_attention_nnz": 978944, "linear_dense_total": 4718592, "linear_dense_nnz": 1107456}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1995264, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 1273344}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2738176, "linear_attention_total": 2359296, "linear_attention_nnz": 1232896, "linear_dense_total": 4718592, "linear_dense_nnz": 1505280}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2998784, "linear_attention_total": 2359296, "linear_attention_nnz": 1455104, "linear_dense_total": 4718592, "linear_dense_nnz": 1543680}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3208192, "linear_attention_total": 2359296, "linear_attention_nnz": 1598464, "linear_dense_total": 4718592, "linear_dense_nnz": 1609728}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3104768, "linear_attention_total": 2359296, "linear_attention_nnz": 1596416, "linear_dense_total": 4718592, "linear_dense_nnz": 1508352}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2499072, "linear_attention_total": 2359296, "linear_attention_nnz": 1373184, "linear_dense_total": 4718592, "linear_dense_nnz": 1125888}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2002432, "linear_attention_total": 2359296, "linear_attention_nnz": 1165312, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1631744, "linear_attention_total": 2359296, "linear_attention_nnz": 1163264, "linear_dense_total": 4718592, "linear_dense_nnz": 468480}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 947712, "linear_attention_total": 2359296, "linear_attention_nnz": 740352, "linear_dense_total": 4718592, "linear_dense_nnz": 207360}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 778240, "linear_attention_total": 2359296, "linear_attention_nnz": 615424, "linear_dense_total": 4718592, "linear_dense_nnz": 162816}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 881152, "linear_attention_total": 2359296, "linear_attention_nnz": 403456, "linear_dense_total": 4718592, "linear_dense_nnz": 477696}}, "total_sparsity": 55.19450225287742, "linear_sparsity": 70.71638695987654}, "speed": {"eval_elapsed_time": 21.167539164889604}, "opt_eval_metrics": {"exact_match": 80.70009460737937, "f1": 88.04831949879843}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l10--2021-01-20--19-01-04/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 72930262, "linear_total": 84934656, "linear_nnz": 48982384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4408688, "linear_attention_total": 2359296, "linear_attention_nnz": 428592, "linear_dense_total": 4718592, "linear_dense_nnz": 3980096}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4561328, "linear_attention_total": 2359296, "linear_attention_nnz": 545744, "linear_dense_total": 4718592, "linear_dense_nnz": 4015584}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4791104, "linear_attention_total": 2359296, "linear_attention_nnz": 729664, "linear_dense_total": 4718592, "linear_dense_nnz": 4061440}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4914112, "linear_attention_total": 2359296, "linear_attention_nnz": 851472, "linear_dense_total": 4718592, "linear_dense_nnz": 4062640}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5008736, "linear_attention_total": 2359296, "linear_attention_nnz": 960992, "linear_dense_total": 4718592, "linear_dense_nnz": 4047744}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4908864, "linear_attention_total": 2359296, "linear_attention_nnz": 902768, "linear_dense_total": 4718592, "linear_dense_nnz": 4006096}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4781792, "linear_attention_total": 2359296, "linear_attention_nnz": 861120, "linear_dense_total": 4718592, "linear_dense_nnz": 3920672}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4492512, "linear_attention_total": 2359296, "linear_attention_nnz": 759664, "linear_dense_total": 4718592, "linear_dense_nnz": 3732848}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4061488, "linear_attention_total": 2359296, "linear_attention_nnz": 670096, "linear_dense_total": 4718592, "linear_dense_nnz": 3391392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3105840, "linear_attention_total": 2359296, "linear_attention_nnz": 444064, "linear_dense_total": 4718592, "linear_dense_nnz": 2661776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2422000, "linear_attention_total": 2359296, "linear_attention_nnz": 329968, "linear_dense_total": 4718592, "linear_dense_nnz": 2092032}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1525920, "linear_attention_total": 2359296, "linear_attention_nnz": 190816, "linear_dense_total": 4718592, "linear_dense_nnz": 1335104}}, "total_sparsity": 33.025871793300276, "linear_sparsity": 42.329331386236504}, "speed": {"eval_elapsed_time": 35.11626772303134}, "opt_eval_metrics": {"exact_match": 81.47587511825922, "f1": 88.58172107792693}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l40--2021-01-20--19-00-35/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53211146, "linear_total": 84934656, "linear_nnz": 29278080, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2993984, "linear_attention_total": 2359296, "linear_attention_nnz": 241280, "linear_dense_total": 4718592, "linear_dense_nnz": 2752704}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3330688, "linear_attention_total": 2359296, "linear_attention_nnz": 379584, "linear_dense_total": 4718592, "linear_dense_nnz": 2951104}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3517120, "linear_attention_total": 2359296, "linear_attention_nnz": 322880, "linear_dense_total": 4718592, "linear_dense_nnz": 3194240}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3720576, "linear_attention_total": 2359296, "linear_attention_nnz": 565440, "linear_dense_total": 4718592, "linear_dense_nnz": 3155136}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3455168, "linear_attention_total": 2359296, "linear_attention_nnz": 390400, "linear_dense_total": 4718592, "linear_dense_nnz": 3064768}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3400192, "linear_attention_total": 2359296, "linear_attention_nnz": 406592, "linear_dense_total": 4718592, "linear_dense_nnz": 2993600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2988160, "linear_attention_total": 2359296, "linear_attention_nnz": 356480, "linear_dense_total": 4718592, "linear_dense_nnz": 2631680}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2477696, "linear_attention_total": 2359296, "linear_attention_nnz": 409920, "linear_dense_total": 4718592, "linear_dense_nnz": 2067776}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1612416, "linear_attention_total": 2359296, "linear_attention_nnz": 242048, "linear_dense_total": 4718592, "linear_dense_nnz": 1370368}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 697664, "linear_attention_total": 2359296, "linear_attention_nnz": 224896, "linear_dense_total": 4718592, "linear_dense_nnz": 472768}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591360, "linear_attention_total": 2359296, "linear_attention_nnz": 172352, "linear_dense_total": 4718592, "linear_dense_nnz": 419008}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 493056, "linear_attention_total": 2359296, "linear_attention_nnz": 104768, "linear_dense_total": 4718592, "linear_dense_nnz": 388288}}, "total_sparsity": 51.13454941064908, "linear_sparsity": 65.52870008680556}, "speed": {"eval_elapsed_time": 26.50232954812236}, "opt_eval_metrics": {"exact_match": 78.76064333017976, "f1": 86.75922108224064}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l20--2021-01-19--16-59-13/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 49113499, "linear_total": 84934656, "linear_nnz": 25174883, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2532837, "linear_attention_total": 2359296, "linear_attention_nnz": 278464, "linear_dense_total": 4718592, "linear_dense_nnz": 2254373}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2724403, "linear_attention_total": 2359296, "linear_attention_nnz": 411200, "linear_dense_total": 4718592, "linear_dense_nnz": 2313203}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2772181, "linear_attention_total": 2359296, "linear_attention_nnz": 388544, "linear_dense_total": 4718592, "linear_dense_nnz": 2383637}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2962889, "linear_attention_total": 2359296, "linear_attention_nnz": 616064, "linear_dense_total": 4718592, "linear_dense_nnz": 2346825}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2756799, "linear_attention_total": 2359296, "linear_attention_nnz": 475392, "linear_dense_total": 4718592, "linear_dense_nnz": 2281407}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2741284, "linear_attention_total": 2359296, "linear_attention_nnz": 485760, "linear_dense_total": 4718592, "linear_dense_nnz": 2255524}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2526246, "linear_attention_total": 2359296, "linear_attention_nnz": 436416, "linear_dense_total": 4718592, "linear_dense_nnz": 2089830}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2233017, "linear_attention_total": 2359296, "linear_attention_nnz": 473664, "linear_dense_total": 4718592, "linear_dense_nnz": 1759353}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652692, "linear_attention_total": 2359296, "linear_attention_nnz": 292096, "linear_dense_total": 4718592, "linear_dense_nnz": 1360596}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1056535, "linear_attention_total": 2359296, "linear_attention_nnz": 260864, "linear_dense_total": 4718592, "linear_dense_nnz": 795671}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 795434, "linear_attention_total": 2359296, "linear_attention_nnz": 207872, "linear_dense_total": 4718592, "linear_dense_nnz": 587562}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 420566, "linear_attention_total": 2359296, "linear_attention_nnz": 115648, "linear_dense_total": 4718592, "linear_dense_nnz": 304918}}, "total_sparsity": 54.89754611459343, "linear_sparsity": 70.35970452391072}, "speed": {"eval_elapsed_time": 29.429046569159254}, "opt_eval_metrics": {"exact_match": 80.15137180700094, "f1": 87.62280270760408}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 50398933, "linear_total": 84934656, "linear_nnz": 26460853, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673633, "linear_attention_total": 2359296, "linear_attention_nnz": 341248, "linear_dense_total": 4718592, "linear_dense_nnz": 2332385}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850180, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 2387588}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2871056, "linear_attention_total": 2359296, "linear_attention_nnz": 412672, "linear_dense_total": 4718592, "linear_dense_nnz": 2458384}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114067, "linear_attention_total": 2359296, "linear_attention_nnz": 692736, "linear_dense_total": 4718592, "linear_dense_nnz": 2421331}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2853594, "linear_attention_total": 2359296, "linear_attention_nnz": 505088, "linear_dense_total": 4718592, "linear_dense_nnz": 2348506}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2871518, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 2322654}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2608144, "linear_attention_total": 2359296, "linear_attention_nnz": 469504, "linear_dense_total": 4718592, "linear_dense_nnz": 2138640}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2382631, "linear_attention_total": 2359296, "linear_attention_nnz": 552448, "linear_dense_total": 4718592, "linear_dense_nnz": 1830183}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757175, "linear_attention_total": 2359296, "linear_attention_nnz": 316672, "linear_dense_total": 4718592, "linear_dense_nnz": 1440503}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1151305, "linear_attention_total": 2359296, "linear_attention_nnz": 292096, "linear_dense_total": 4718592, "linear_dense_nnz": 859209}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 873504, "linear_attention_total": 2359296, "linear_attention_nnz": 227328, "linear_dense_total": 4718592, "linear_dense_nnz": 646176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 454046, "linear_attention_total": 2359296, "linear_attention_nnz": 128000, "linear_dense_total": 4718592, "linear_dense_nnz": 326046}}, "total_sparsity": 53.71709208691902, "linear_sparsity": 68.84563469592435}, "speed": {"eval_elapsed_time": 28.692298884037882}, "opt_eval_metrics": {"exact_match": 79.92431409649953, "f1": 87.57193515884181}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 50358753, "linear_total": 84934656, "linear_nnz": 26420688, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2668105, "linear_attention_total": 2359296, "linear_attention_nnz": 335872, "linear_dense_total": 4718592, "linear_dense_nnz": 2332233}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2839080, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 2387496}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2866908, "linear_attention_total": 2359296, "linear_attention_nnz": 408576, "linear_dense_total": 4718592, "linear_dense_nnz": 2458332}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3103682, "linear_attention_total": 2359296, "linear_attention_nnz": 682496, "linear_dense_total": 4718592, "linear_dense_nnz": 2421186}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2853238, "linear_attention_total": 2359296, "linear_attention_nnz": 504832, "linear_dense_total": 4718592, "linear_dense_nnz": 2348406}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2880784, "linear_attention_total": 2359296, "linear_attention_nnz": 558336, "linear_dense_total": 4718592, "linear_dense_nnz": 2322448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2614378, "linear_attention_total": 2359296, "linear_attention_nnz": 475904, "linear_dense_total": 4718592, "linear_dense_nnz": 2138474}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2372808, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 1830088}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1752978, "linear_attention_total": 2359296, "linear_attention_nnz": 312576, "linear_dense_total": 4718592, "linear_dense_nnz": 1440402}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1147129, "linear_attention_total": 2359296, "linear_attention_nnz": 288000, "linear_dense_total": 4718592, "linear_dense_nnz": 859129}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870927, "linear_attention_total": 2359296, "linear_attention_nnz": 224768, "linear_dense_total": 4718592, "linear_dense_nnz": 646159}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 450671, "linear_attention_total": 2359296, "linear_attention_nnz": 124672, "linear_dense_total": 4718592, "linear_dense_nnz": 325999}}, "total_sparsity": 53.75399063078199, "linear_sparsity": 68.89292399088542}, "speed": {"eval_elapsed_time": 28.704244010150433}, "opt_eval_metrics": {"exact_match": 80.02838221381268, "f1": 87.5280353923367}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 58360680, "linear_total": 84934656, "linear_nnz": 34416900, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3232823, "linear_attention_total": 2359296, "linear_attention_nnz": 405824, "linear_dense_total": 4718592, "linear_dense_nnz": 2826999}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3412647, "linear_attention_total": 2359296, "linear_attention_nnz": 543872, "linear_dense_total": 4718592, "linear_dense_nnz": 2868775}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3556851, "linear_attention_total": 2359296, "linear_attention_nnz": 613248, "linear_dense_total": 4718592, "linear_dense_nnz": 2943603}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3708198, "linear_attention_total": 2359296, "linear_attention_nnz": 791424, "linear_dense_total": 4718592, "linear_dense_nnz": 2916774}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3695959, "linear_attention_total": 2359296, "linear_attention_nnz": 819072, "linear_dense_total": 4718592, "linear_dense_nnz": 2876887}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3644598, "linear_attention_total": 2359296, "linear_attention_nnz": 788928, "linear_dense_total": 4718592, "linear_dense_nnz": 2855670}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3486486, "linear_attention_total": 2359296, "linear_attention_nnz": 761600, "linear_dense_total": 4718592, "linear_dense_nnz": 2724886}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114460, "linear_attention_total": 2359296, "linear_attention_nnz": 686464, "linear_dense_total": 4718592, "linear_dense_nnz": 2427996}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2616038, "linear_attention_total": 2359296, "linear_attention_nnz": 602496, "linear_dense_total": 4718592, "linear_dense_nnz": 2013542}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1775741, "linear_attention_total": 2359296, "linear_attention_nnz": 381632, "linear_dense_total": 4718592, "linear_dense_nnz": 1394109}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1407393, "linear_attention_total": 2359296, "linear_attention_nnz": 325760, "linear_dense_total": 4718592, "linear_dense_nnz": 1081633}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 765706, "linear_attention_total": 2359296, "linear_attention_nnz": 174016, "linear_dense_total": 4718592, "linear_dense_nnz": 591690}}, "total_sparsity": 46.405572153982156, "linear_sparsity": 59.47837829589844}, "speed": {"eval_elapsed_time": 33.08102096617222}, "opt_eval_metrics": {"exact_match": 81.00283822138127, "f1": 88.2671108560581}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 58344499, "linear_total": 84934656, "linear_nnz": 34400721, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3231632, "linear_attention_total": 2359296, "linear_attention_nnz": 404736, "linear_dense_total": 4718592, "linear_dense_nnz": 2826896}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3411716, "linear_attention_total": 2359296, "linear_attention_nnz": 543040, "linear_dense_total": 4718592, "linear_dense_nnz": 2868676}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3557965, "linear_attention_total": 2359296, "linear_attention_nnz": 614464, "linear_dense_total": 4718592, "linear_dense_nnz": 2943501}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3706774, "linear_attention_total": 2359296, "linear_attention_nnz": 790144, "linear_dense_total": 4718592, "linear_dense_nnz": 2916630}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3693580, "linear_attention_total": 2359296, "linear_attention_nnz": 816832, "linear_dense_total": 4718592, "linear_dense_nnz": 2876748}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3641505, "linear_attention_total": 2359296, "linear_attention_nnz": 785920, "linear_dense_total": 4718592, "linear_dense_nnz": 2855585}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3484162, "linear_attention_total": 2359296, "linear_attention_nnz": 759424, "linear_dense_total": 4718592, "linear_dense_nnz": 2724738}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114894, "linear_attention_total": 2359296, "linear_attention_nnz": 687040, "linear_dense_total": 4718592, "linear_dense_nnz": 2427854}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2617066, "linear_attention_total": 2359296, "linear_attention_nnz": 603648, "linear_dense_total": 4718592, "linear_dense_nnz": 2013418}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1773359, "linear_attention_total": 2359296, "linear_attention_nnz": 379328, "linear_dense_total": 4718592, "linear_dense_nnz": 1394031}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404175, "linear_attention_total": 2359296, "linear_attention_nnz": 322624, "linear_dense_total": 4718592, "linear_dense_nnz": 1081551}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 763893, "linear_attention_total": 2359296, "linear_attention_nnz": 172288, "linear_dense_total": 4718592, "linear_dense_nnz": 591605}}, "total_sparsity": 46.42043166961797, "linear_sparsity": 59.49742705733687}, "speed": {"eval_elapsed_time": 33.090760480146855}, "opt_eval_metrics": {"exact_match": 81.01229895931883, "f1": 88.16022239737082}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 63788226, "linear_total": 84934656, "linear_nnz": 39853312, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3867392, "linear_attention_total": 2359296, "linear_attention_nnz": 418816, "linear_dense_total": 4718592, "linear_dense_nnz": 3448576}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4250112, "linear_attention_total": 2359296, "linear_attention_nnz": 553728, "linear_dense_total": 4718592, "linear_dense_nnz": 3696384}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4519424, "linear_attention_total": 2359296, "linear_attention_nnz": 562432, "linear_dense_total": 4718592, "linear_dense_nnz": 3956992}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4809728, "linear_attention_total": 2359296, "linear_attention_nnz": 827392, "linear_dense_total": 4718592, "linear_dense_nnz": 3982336}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4702976, "linear_attention_total": 2359296, "linear_attention_nnz": 790016, "linear_dense_total": 4718592, "linear_dense_nnz": 3912960}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4601344, "linear_attention_total": 2359296, "linear_attention_nnz": 701696, "linear_dense_total": 4718592, "linear_dense_nnz": 3899648}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4243456, "linear_attention_total": 2359296, "linear_attention_nnz": 667392, "linear_dense_total": 4718592, "linear_dense_nnz": 3576064}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3669248, "linear_attention_total": 2359296, "linear_attention_nnz": 700416, "linear_dense_total": 4718592, "linear_dense_nnz": 2968832}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2404096, "linear_attention_total": 2359296, "linear_attention_nnz": 437504, "linear_dense_total": 4718592, "linear_dense_nnz": 1966592}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1054976, "linear_attention_total": 2359296, "linear_attention_nnz": 361472, "linear_dense_total": 4718592, "linear_dense_nnz": 693504}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 946176, "linear_attention_total": 2359296, "linear_attention_nnz": 291584, "linear_dense_total": 4718592, "linear_dense_nnz": 654592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784384, "linear_attention_total": 2359296, "linear_attention_nnz": 168960, "linear_dense_total": 4718592, "linear_dense_nnz": 615424}}, "total_sparsity": 41.42128782970864, "linear_sparsity": 53.077678915895056}, "speed": {"eval_elapsed_time": 26.95584986009635}, "opt_eval_metrics": {"exact_match": 80.02838221381268, "f1": 87.51569063636161}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 63672482, "linear_total": 84934656, "linear_nnz": 39737600, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3861248, "linear_attention_total": 2359296, "linear_attention_nnz": 416256, "linear_dense_total": 4718592, "linear_dense_nnz": 3444992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4250368, "linear_attention_total": 2359296, "linear_attention_nnz": 541952, "linear_dense_total": 4718592, "linear_dense_nnz": 3708416}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4532736, "linear_attention_total": 2359296, "linear_attention_nnz": 555520, "linear_dense_total": 4718592, "linear_dense_nnz": 3977216}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4822784, "linear_attention_total": 2359296, "linear_attention_nnz": 802816, "linear_dense_total": 4718592, "linear_dense_nnz": 4019968}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4714240, "linear_attention_total": 2359296, "linear_attention_nnz": 774400, "linear_dense_total": 4718592, "linear_dense_nnz": 3939840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4584192, "linear_attention_total": 2359296, "linear_attention_nnz": 686592, "linear_dense_total": 4718592, "linear_dense_nnz": 3897600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4225024, "linear_attention_total": 2359296, "linear_attention_nnz": 656384, "linear_dense_total": 4718592, "linear_dense_nnz": 3568640}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3634176, "linear_attention_total": 2359296, "linear_attention_nnz": 676864, "linear_dense_total": 4718592, "linear_dense_nnz": 2957312}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2363904, "linear_attention_total": 2359296, "linear_attention_nnz": 432640, "linear_dense_total": 4718592, "linear_dense_nnz": 1931264}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1030400, "linear_attention_total": 2359296, "linear_attention_nnz": 345344, "linear_dense_total": 4718592, "linear_dense_nnz": 685056}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 933120, "linear_attention_total": 2359296, "linear_attention_nnz": 285184, "linear_dense_total": 4718592, "linear_dense_nnz": 647936}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 785408, "linear_attention_total": 2359296, "linear_attention_nnz": 174080, "linear_dense_total": 4718592, "linear_dense_nnz": 611328}}, "total_sparsity": 41.52757914531035, "linear_sparsity": 53.213915412808646}, "speed": {"eval_elapsed_time": 26.93265108484775}, "opt_eval_metrics": {"exact_match": 79.55534531693472, "f1": 87.439750439335}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 63651698, "linear_total": 84934656, "linear_nnz": 39716864, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3864832, "linear_attention_total": 2359296, "linear_attention_nnz": 417024, "linear_dense_total": 4718592, "linear_dense_nnz": 3447808}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4246016, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 3703296}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4537600, "linear_attention_total": 2359296, "linear_attention_nnz": 555776, "linear_dense_total": 4718592, "linear_dense_nnz": 3981824}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4824576, "linear_attention_total": 2359296, "linear_attention_nnz": 810240, "linear_dense_total": 4718592, "linear_dense_nnz": 4014336}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4704768, "linear_attention_total": 2359296, "linear_attention_nnz": 764160, "linear_dense_total": 4718592, "linear_dense_nnz": 3940608}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4590080, "linear_attention_total": 2359296, "linear_attention_nnz": 685824, "linear_dense_total": 4718592, "linear_dense_nnz": 3904256}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4219136, "linear_attention_total": 2359296, "linear_attention_nnz": 647680, "linear_dense_total": 4718592, "linear_dense_nnz": 3571456}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3640576, "linear_attention_total": 2359296, "linear_attention_nnz": 684288, "linear_dense_total": 4718592, "linear_dense_nnz": 2956288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2360064, "linear_attention_total": 2359296, "linear_attention_nnz": 427264, "linear_dense_total": 4718592, "linear_dense_nnz": 1932800}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1033472, "linear_attention_total": 2359296, "linear_attention_nnz": 350976, "linear_dense_total": 4718592, "linear_dense_nnz": 682496}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 920832, "linear_attention_total": 2359296, "linear_attention_nnz": 273408, "linear_dense_total": 4718592, "linear_dense_nnz": 647424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 774912, "linear_attention_total": 2359296, "linear_attention_nnz": 166400, "linear_dense_total": 4718592, "linear_dense_nnz": 608512}}, "total_sparsity": 41.546665739029805, "linear_sparsity": 53.238329475308646}, "speed": {"eval_elapsed_time": 26.93877486907877}, "opt_eval_metrics": {"exact_match": 79.67833491012298, "f1": 87.29496050765553}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl2--2021-01-21--00-53-13/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 38271273, "linear_total": 84934656, "linear_nnz": 14360064, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 991744, "linear_attention_total": 2359296, "linear_attention_nnz": 864256, "linear_dense_total": 4718592, "linear_dense_nnz": 127488}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 965120, "linear_attention_total": 2359296, "linear_attention_nnz": 748544, "linear_dense_total": 4718592, "linear_dense_nnz": 216576}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1487360, "linear_attention_total": 2359296, "linear_attention_nnz": 1163264, "linear_dense_total": 4718592, "linear_dense_nnz": 324096}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1767424, "linear_attention_total": 2359296, "linear_attention_nnz": 1389568, "linear_dense_total": 4718592, "linear_dense_nnz": 377856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1864704, "linear_attention_total": 2359296, "linear_attention_nnz": 1449984, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1713664, "linear_attention_total": 2359296, "linear_attention_nnz": 1349632, "linear_dense_total": 4718592, "linear_dense_nnz": 364032}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1481216, "linear_attention_total": 2359296, "linear_attention_nnz": 1187840, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1190400, "linear_attention_total": 2359296, "linear_attention_nnz": 964608, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1191424, "linear_attention_total": 2359296, "linear_attention_nnz": 1063936, "linear_dense_total": 4718592, "linear_dense_nnz": 127488}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708608, "linear_attention_total": 2359296, "linear_attention_nnz": 650240, "linear_dense_total": 4718592, "linear_dense_nnz": 58368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 556544, "linear_attention_total": 2359296, "linear_attention_nnz": 502784, "linear_dense_total": 4718592, "linear_dense_nnz": 53760}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 441856, "linear_attention_total": 2359296, "linear_attention_nnz": 360448, "linear_dense_total": 4718592, "linear_dense_nnz": 81408}}, "total_sparsity": 64.85429951512302, "linear_sparsity": 83.0928096064815}, "speed": {"eval_elapsed_time": 17.621023153187707}, "opt_eval_metrics": {"exact_match": 78.67549668874172, "f1": 86.51098653495667}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 59478503, "linear_total": 84934656, "linear_nnz": 35536574, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3362923, "linear_attention_total": 2359296, "linear_attention_nnz": 466432, "linear_dense_total": 4718592, "linear_dense_nnz": 2896491}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3511822, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 2933262}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3642442, "linear_attention_total": 2359296, "linear_attention_nnz": 636672, "linear_dense_total": 4718592, "linear_dense_nnz": 3005770}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3843335, "linear_attention_total": 2359296, "linear_attention_nnz": 857344, "linear_dense_total": 4718592, "linear_dense_nnz": 2985991}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3768311, "linear_attention_total": 2359296, "linear_attention_nnz": 829184, "linear_dense_total": 4718592, "linear_dense_nnz": 2939127}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3670401, "linear_attention_total": 2359296, "linear_attention_nnz": 754432, "linear_dense_total": 4718592, "linear_dense_nnz": 2915969}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3555086, "linear_attention_total": 2359296, "linear_attention_nnz": 767488, "linear_dense_total": 4718592, "linear_dense_nnz": 2787598}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3250234, "linear_attention_total": 2359296, "linear_attention_nnz": 752640, "linear_dense_total": 4718592, "linear_dense_nnz": 2497594}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2669249, "linear_attention_total": 2359296, "linear_attention_nnz": 553472, "linear_dense_total": 4718592, "linear_dense_nnz": 2115777}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1903656, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 1490216}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522534, "linear_attention_total": 2359296, "linear_attention_nnz": 353792, "linear_dense_total": 4718592, "linear_dense_nnz": 1168742}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 836581, "linear_attention_total": 2359296, "linear_attention_nnz": 204032, "linear_dense_total": 4718592, "linear_dense_nnz": 632549}}, "total_sparsity": 45.379040521415185, "linear_sparsity": 58.160101337197375}, "speed": {"eval_elapsed_time": 30.383016001898795}, "opt_eval_metrics": {"exact_match": 80.93661305581836, "f1": 88.29241912882233}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 59470230, "linear_total": 84934656, "linear_nnz": 35528301, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3365714, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 2896466}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3508110, "linear_attention_total": 2359296, "linear_attention_nnz": 574976, "linear_dense_total": 4718592, "linear_dense_nnz": 2933134}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3640290, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 3005666}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3837370, "linear_attention_total": 2359296, "linear_attention_nnz": 851456, "linear_dense_total": 4718592, "linear_dense_nnz": 2985914}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3769702, "linear_attention_total": 2359296, "linear_attention_nnz": 830720, "linear_dense_total": 4718592, "linear_dense_nnz": 2938982}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3672353, "linear_attention_total": 2359296, "linear_attention_nnz": 756480, "linear_dense_total": 4718592, "linear_dense_nnz": 2915873}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3555719, "linear_attention_total": 2359296, "linear_attention_nnz": 768256, "linear_dense_total": 4718592, "linear_dense_nnz": 2787463}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3250893, "linear_attention_total": 2359296, "linear_attention_nnz": 753408, "linear_dense_total": 4718592, "linear_dense_nnz": 2497485}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666597, "linear_attention_total": 2359296, "linear_attention_nnz": 550912, "linear_dense_total": 4718592, "linear_dense_nnz": 2115685}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1903316, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 1490132}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1524282, "linear_attention_total": 2359296, "linear_attention_nnz": 355584, "linear_dense_total": 4718592, "linear_dense_nnz": 1168698}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 833955, "linear_attention_total": 2359296, "linear_attention_nnz": 201472, "linear_dense_total": 4718592, "linear_dense_nnz": 632483}}, "total_sparsity": 45.38663787466004, "linear_sparsity": 58.16984176635742}, "speed": {"eval_elapsed_time": 30.506126267835498}, "opt_eval_metrics": {"exact_match": 80.77578051087986, "f1": 88.22778160568927}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41069735, "linear_total": 84934656, "linear_nnz": 17134148, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825423, "linear_attention_total": 2359296, "linear_attention_nnz": 185152, "linear_dense_total": 4718592, "linear_dense_nnz": 1640271}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2011232, "linear_attention_total": 2359296, "linear_attention_nnz": 309376, "linear_dense_total": 4718592, "linear_dense_nnz": 1701856}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2013521, "linear_attention_total": 2359296, "linear_attention_nnz": 266368, "linear_dense_total": 4718592, "linear_dense_nnz": 1747153}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2151481, "linear_attention_total": 2359296, "linear_attention_nnz": 452288, "linear_dense_total": 4718592, "linear_dense_nnz": 1699193}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1937929, "linear_attention_total": 2359296, "linear_attention_nnz": 315584, "linear_dense_total": 4718592, "linear_dense_nnz": 1622345}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1906344, "linear_attention_total": 2359296, "linear_attention_nnz": 324160, "linear_dense_total": 4718592, "linear_dense_nnz": 1582184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660767, "linear_attention_total": 2359296, "linear_attention_nnz": 264448, "linear_dense_total": 4718592, "linear_dense_nnz": 1396319}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1418922, "linear_attention_total": 2359296, "linear_attention_nnz": 312704, "linear_dense_total": 4718592, "linear_dense_nnz": 1106218}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 973188, "linear_attention_total": 2359296, "linear_attention_nnz": 176128, "linear_dense_total": 4718592, "linear_dense_nnz": 797060}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574608, "linear_attention_total": 2359296, "linear_attention_nnz": 178368, "linear_dense_total": 4718592, "linear_dense_nnz": 396240}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 423537, "linear_attention_total": 2359296, "linear_attention_nnz": 140224, "linear_dense_total": 4718592, "linear_dense_nnz": 283313}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 237196, "linear_attention_total": 2359296, "linear_attention_nnz": 82304, "linear_dense_total": 4718592, "linear_dense_nnz": 154892}}, "total_sparsity": 62.28438480989986, "linear_sparsity": 79.82667051715615}, "speed": {"eval_elapsed_time": 28.065979121020064}, "opt_eval_metrics": {"exact_match": 78.59981078524125, "f1": 86.70965342219107}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40928357, "linear_total": 84934656, "linear_nnz": 16992855, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1811376, "linear_attention_total": 2359296, "linear_attention_nnz": 181120, "linear_dense_total": 4718592, "linear_dense_nnz": 1630256}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1999638, "linear_attention_total": 2359296, "linear_attention_nnz": 307392, "linear_dense_total": 4718592, "linear_dense_nnz": 1692246}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2004326, "linear_attention_total": 2359296, "linear_attention_nnz": 266880, "linear_dense_total": 4718592, "linear_dense_nnz": 1737446}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2132105, "linear_attention_total": 2359296, "linear_attention_nnz": 442304, "linear_dense_total": 4718592, "linear_dense_nnz": 1689801}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1922729, "linear_attention_total": 2359296, "linear_attention_nnz": 309632, "linear_dense_total": 4718592, "linear_dense_nnz": 1613097}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886520, "linear_attention_total": 2359296, "linear_attention_nnz": 313664, "linear_dense_total": 4718592, "linear_dense_nnz": 1572856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1646649, "linear_attention_total": 2359296, "linear_attention_nnz": 259072, "linear_dense_total": 4718592, "linear_dense_nnz": 1387577}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404939, "linear_attention_total": 2359296, "linear_attention_nnz": 306112, "linear_dense_total": 4718592, "linear_dense_nnz": 1098827}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 964630, "linear_attention_total": 2359296, "linear_attention_nnz": 173184, "linear_dense_total": 4718592, "linear_dense_nnz": 791446}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566118, "linear_attention_total": 2359296, "linear_attention_nnz": 172928, "linear_dense_total": 4718592, "linear_dense_nnz": 393190}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417586, "linear_attention_total": 2359296, "linear_attention_nnz": 136448, "linear_dense_total": 4718592, "linear_dense_nnz": 281138}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 236239, "linear_attention_total": 2359296, "linear_attention_nnz": 82304, "linear_dense_total": 4718592, "linear_dense_nnz": 153935}}, "total_sparsity": 62.414216625088, "linear_sparsity": 79.99302546183267}, "speed": {"eval_elapsed_time": 28.04132828908041}, "opt_eval_metrics": {"exact_match": 78.78902554399244, "f1": 86.80367154149816}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 40912185, "linear_total": 84934656, "linear_nnz": 16976675, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1810859, "linear_attention_total": 2359296, "linear_attention_nnz": 180736, "linear_dense_total": 4718592, "linear_dense_nnz": 1630123}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998023, "linear_attention_total": 2359296, "linear_attention_nnz": 305920, "linear_dense_total": 4718592, "linear_dense_nnz": 1692103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2001199, "linear_attention_total": 2359296, "linear_attention_nnz": 263936, "linear_dense_total": 4718592, "linear_dense_nnz": 1737263}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2132118, "linear_attention_total": 2359296, "linear_attention_nnz": 442496, "linear_dense_total": 4718592, "linear_dense_nnz": 1689622}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1919231, "linear_attention_total": 2359296, "linear_attention_nnz": 306304, "linear_dense_total": 4718592, "linear_dense_nnz": 1612927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1884897, "linear_attention_total": 2359296, "linear_attention_nnz": 312128, "linear_dense_total": 4718592, "linear_dense_nnz": 1572769}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1645758, "linear_attention_total": 2359296, "linear_attention_nnz": 258304, "linear_dense_total": 4718592, "linear_dense_nnz": 1387454}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404565, "linear_attention_total": 2359296, "linear_attention_nnz": 305856, "linear_dense_total": 4718592, "linear_dense_nnz": 1098709}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 963790, "linear_attention_total": 2359296, "linear_attention_nnz": 172480, "linear_dense_total": 4718592, "linear_dense_nnz": 791310}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 564071, "linear_attention_total": 2359296, "linear_attention_nnz": 170944, "linear_dense_total": 4718592, "linear_dense_nnz": 393127}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 416716, "linear_attention_total": 2359296, "linear_attention_nnz": 135616, "linear_dense_total": 4718592, "linear_dense_nnz": 281100}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 235448, "linear_attention_total": 2359296, "linear_attention_nnz": 81536, "linear_dense_total": 4718592, "linear_dense_nnz": 153912}}, "total_sparsity": 62.42906787574385, "linear_sparsity": 80.01207540064682}, "speed": {"eval_elapsed_time": 28.038834661012515}, "opt_eval_metrics": {"exact_match": 78.6092715231788, "f1": 86.70267601348202}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39496838, "linear_total": 84934656, "linear_nnz": 15559744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1647204, "linear_attention_total": 2359296, "linear_attention_nnz": 145232, "linear_dense_total": 4718592, "linear_dense_nnz": 1501972}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842586, "linear_attention_total": 2359296, "linear_attention_nnz": 280192, "linear_dense_total": 4718592, "linear_dense_nnz": 1562394}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841850, "linear_attention_total": 2359296, "linear_attention_nnz": 234064, "linear_dense_total": 4718592, "linear_dense_nnz": 1607786}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1960732, "linear_attention_total": 2359296, "linear_attention_nnz": 386752, "linear_dense_total": 4718592, "linear_dense_nnz": 1573980}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1776766, "linear_attention_total": 2359296, "linear_attention_nnz": 281632, "linear_dense_total": 4718592, "linear_dense_nnz": 1495134}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1744230, "linear_attention_total": 2359296, "linear_attention_nnz": 288320, "linear_dense_total": 4718592, "linear_dense_nnz": 1455910}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518906, "linear_attention_total": 2359296, "linear_attention_nnz": 240864, "linear_dense_total": 4718592, "linear_dense_nnz": 1278042}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1276331, "linear_attention_total": 2359296, "linear_attention_nnz": 275424, "linear_dense_total": 4718592, "linear_dense_nnz": 1000907}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 878990, "linear_attention_total": 2359296, "linear_attention_nnz": 170816, "linear_dense_total": 4718592, "linear_dense_nnz": 708174}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496916, "linear_attention_total": 2359296, "linear_attention_nnz": 165920, "linear_dense_total": 4718592, "linear_dense_nnz": 330996}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360001, "linear_attention_total": 2359296, "linear_attention_nnz": 126288, "linear_dense_total": 4718592, "linear_dense_nnz": 233713}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 215232, "linear_attention_total": 2359296, "linear_attention_nnz": 73824, "linear_dense_total": 4718592, "linear_dense_nnz": 141408}}, "total_sparsity": 63.728825052469304, "linear_sparsity": 81.68033552758487}, "speed": {"eval_elapsed_time": 29.592536952113733}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 87.22039562207584}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39357122, "linear_total": 84934656, "linear_nnz": 15420094, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1634237, "linear_attention_total": 2359296, "linear_attention_nnz": 142224, "linear_dense_total": 4718592, "linear_dense_nnz": 1492013}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828361, "linear_attention_total": 2359296, "linear_attention_nnz": 275696, "linear_dense_total": 4718592, "linear_dense_nnz": 1552665}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825967, "linear_attention_total": 2359296, "linear_attention_nnz": 227984, "linear_dense_total": 4718592, "linear_dense_nnz": 1597983}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1943566, "linear_attention_total": 2359296, "linear_attention_nnz": 379616, "linear_dense_total": 4718592, "linear_dense_nnz": 1563950}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1761455, "linear_attention_total": 2359296, "linear_attention_nnz": 275824, "linear_dense_total": 4718592, "linear_dense_nnz": 1485631}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1729290, "linear_attention_total": 2359296, "linear_attention_nnz": 282736, "linear_dense_total": 4718592, "linear_dense_nnz": 1446554}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1504955, "linear_attention_total": 2359296, "linear_attention_nnz": 235856, "linear_dense_total": 4718592, "linear_dense_nnz": 1269099}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1263225, "linear_attention_total": 2359296, "linear_attention_nnz": 269520, "linear_dense_total": 4718592, "linear_dense_nnz": 993705}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870496, "linear_attention_total": 2359296, "linear_attention_nnz": 167616, "linear_dense_total": 4718592, "linear_dense_nnz": 702880}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489695, "linear_attention_total": 2359296, "linear_attention_nnz": 161552, "linear_dense_total": 4718592, "linear_dense_nnz": 328143}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 355803, "linear_attention_total": 2359296, "linear_attention_nnz": 124096, "linear_dense_total": 4718592, "linear_dense_nnz": 231707}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 213044, "linear_attention_total": 2359296, "linear_attention_nnz": 72608, "linear_dense_total": 4718592, "linear_dense_nnz": 140436}}, "total_sparsity": 63.85713060135829, "linear_sparsity": 81.84475604398752}, "speed": {"eval_elapsed_time": 29.660654196050018}, "opt_eval_metrics": {"exact_match": 79.12961210974456, "f1": 87.04337592394437}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 39354055, "linear_total": 84934656, "linear_nnz": 15417031, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1634041, "linear_attention_total": 2359296, "linear_attention_nnz": 142224, "linear_dense_total": 4718592, "linear_dense_nnz": 1491817}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828346, "linear_attention_total": 2359296, "linear_attention_nnz": 275888, "linear_dense_total": 4718592, "linear_dense_nnz": 1552458}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825560, "linear_attention_total": 2359296, "linear_attention_nnz": 227744, "linear_dense_total": 4718592, "linear_dense_nnz": 1597816}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1942802, "linear_attention_total": 2359296, "linear_attention_nnz": 379008, "linear_dense_total": 4718592, "linear_dense_nnz": 1563794}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1761660, "linear_attention_total": 2359296, "linear_attention_nnz": 276192, "linear_dense_total": 4718592, "linear_dense_nnz": 1485468}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1728493, "linear_attention_total": 2359296, "linear_attention_nnz": 282096, "linear_dense_total": 4718592, "linear_dense_nnz": 1446397}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1504843, "linear_attention_total": 2359296, "linear_attention_nnz": 235856, "linear_dense_total": 4718592, "linear_dense_nnz": 1268987}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1262994, "linear_attention_total": 2359296, "linear_attention_nnz": 269456, "linear_dense_total": 4718592, "linear_dense_nnz": 993538}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870263, "linear_attention_total": 2359296, "linear_attention_nnz": 167520, "linear_dense_total": 4718592, "linear_dense_nnz": 702743}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489503, "linear_attention_total": 2359296, "linear_attention_nnz": 161424, "linear_dense_total": 4718592, "linear_dense_nnz": 328079}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 355610, "linear_attention_total": 2359296, "linear_attention_nnz": 123920, "linear_dense_total": 4718592, "linear_dense_nnz": 231690}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 212916, "linear_attention_total": 2359296, "linear_attention_nnz": 72512, "linear_dense_total": 4718592, "linear_dense_nnz": 140404}}, "total_sparsity": 63.859947122862216, "linear_sparsity": 81.84836234575437}, "speed": {"eval_elapsed_time": 29.750202575000003}, "opt_eval_metrics": {"exact_match": 79.09176915799432, "f1": 86.93076968810146}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36724619, "linear_total": 84934656, "linear_nnz": 12816896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 933888, "linear_attention_total": 2359296, "linear_attention_nnz": 522240, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1116160, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1374720, "linear_attention_total": 2359296, "linear_attention_nnz": 675840, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692160, "linear_attention_total": 2359296, "linear_attention_nnz": 977920, "linear_dense_total": 4718592, "linear_dense_nnz": 714240}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1659392, "linear_attention_total": 2359296, "linear_attention_nnz": 825344, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1416192, "linear_attention_total": 2359296, "linear_attention_nnz": 672768, "linear_dense_total": 4718592, "linear_dense_nnz": 743424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1207296, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1235456, "linear_attention_total": 2359296, "linear_attention_nnz": 785408, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 778240, "linear_attention_total": 2359296, "linear_attention_nnz": 514048, "linear_dense_total": 4718592, "linear_dense_nnz": 264192}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 526336, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 101376}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455168, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 421888, "linear_attention_total": 2359296, "linear_attention_nnz": 222208, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}}, "total_sparsity": 66.2746399944621, "linear_sparsity": 84.9096981095679}, "speed": {"eval_elapsed_time": 15.043476368067786}, "opt_eval_metrics": {"exact_match": 78.3349101229896, "f1": 86.4116267700138}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 36711275, "linear_total": 84934656, "linear_nnz": 12803584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 930816, "linear_attention_total": 2359296, "linear_attention_nnz": 519168, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1129472, "linear_attention_total": 2359296, "linear_attention_nnz": 536576, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1366528, "linear_attention_total": 2359296, "linear_attention_nnz": 667648, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1681920, "linear_attention_total": 2359296, "linear_attention_nnz": 967680, "linear_dense_total": 4718592, "linear_dense_nnz": 714240}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1669632, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1412096, "linear_attention_total": 2359296, "linear_attention_nnz": 668672, "linear_dense_total": 4718592, "linear_dense_nnz": 743424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1221632, "linear_attention_total": 2359296, "linear_attention_nnz": 653312, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1237504, "linear_attention_total": 2359296, "linear_attention_nnz": 787456, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 757760, "linear_attention_total": 2359296, "linear_attention_nnz": 493568, "linear_dense_total": 4718592, "linear_dense_nnz": 264192}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 526336, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 101376}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 443904, "linear_attention_total": 2359296, "linear_attention_nnz": 356352, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 425984, "linear_attention_total": 2359296, "linear_attention_nnz": 226304, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}}, "total_sparsity": 66.28689420474849, "linear_sparsity": 84.92537133487654}, "speed": {"eval_elapsed_time": 15.047897994983941}, "opt_eval_metrics": {"exact_match": 78.37275307473983, "f1": 86.39441106336629}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l10--2021-01-20--18-59-37/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 72403618, "linear_total": 84934656, "linear_nnz": 48458624, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4451008, "linear_attention_total": 2359296, "linear_attention_nnz": 446336, "linear_dense_total": 4718592, "linear_dense_nnz": 4004672}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4674880, "linear_attention_total": 2359296, "linear_attention_nnz": 597248, "linear_dense_total": 4718592, "linear_dense_nnz": 4077632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4954368, "linear_attention_total": 2359296, "linear_attention_nnz": 799296, "linear_dense_total": 4718592, "linear_dense_nnz": 4155072}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5115648, "linear_attention_total": 2359296, "linear_attention_nnz": 950208, "linear_dense_total": 4718592, "linear_dense_nnz": 4165440}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5174848, "linear_attention_total": 2359296, "linear_attention_nnz": 1022400, "linear_dense_total": 4718592, "linear_dense_nnz": 4152448}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5020992, "linear_attention_total": 2359296, "linear_attention_nnz": 914368, "linear_dense_total": 4718592, "linear_dense_nnz": 4106624}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4879296, "linear_attention_total": 2359296, "linear_attention_nnz": 918208, "linear_dense_total": 4718592, "linear_dense_nnz": 3961088}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4517696, "linear_attention_total": 2359296, "linear_attention_nnz": 832704, "linear_dense_total": 4718592, "linear_dense_nnz": 3684992}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3922688, "linear_attention_total": 2359296, "linear_attention_nnz": 715648, "linear_dense_total": 4718592, "linear_dense_nnz": 3207040}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2584512, "linear_attention_total": 2359296, "linear_attention_nnz": 467072, "linear_dense_total": 4718592, "linear_dense_nnz": 2117440}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879872, "linear_attention_total": 2359296, "linear_attention_nnz": 362688, "linear_dense_total": 4718592, "linear_dense_nnz": 1517184}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1282816, "linear_attention_total": 2359296, "linear_attention_nnz": 218432, "linear_dense_total": 4718592, "linear_dense_nnz": 1064384}}, "total_sparsity": 33.50950536060172, "linear_sparsity": 42.94599368248457}, "speed": {"eval_elapsed_time": 32.87611435819417}, "opt_eval_metrics": {"exact_match": 81.10690633869442, "f1": 88.3744311515211}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l10--2021-01-20--18-59-37/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 72395170, "linear_total": 84934656, "linear_nnz": 48450176, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4450944, "linear_attention_total": 2359296, "linear_attention_nnz": 446080, "linear_dense_total": 4718592, "linear_dense_nnz": 4004864}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4674240, "linear_attention_total": 2359296, "linear_attention_nnz": 597312, "linear_dense_total": 4718592, "linear_dense_nnz": 4076928}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4955648, "linear_attention_total": 2359296, "linear_attention_nnz": 800192, "linear_dense_total": 4718592, "linear_dense_nnz": 4155456}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5114624, "linear_attention_total": 2359296, "linear_attention_nnz": 948864, "linear_dense_total": 4718592, "linear_dense_nnz": 4165760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5171840, "linear_attention_total": 2359296, "linear_attention_nnz": 1019200, "linear_dense_total": 4718592, "linear_dense_nnz": 4152640}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5023808, "linear_attention_total": 2359296, "linear_attention_nnz": 915392, "linear_dense_total": 4718592, "linear_dense_nnz": 4108416}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4876544, "linear_attention_total": 2359296, "linear_attention_nnz": 916160, "linear_dense_total": 4718592, "linear_dense_nnz": 3960384}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4519232, "linear_attention_total": 2359296, "linear_attention_nnz": 834176, "linear_dense_total": 4718592, "linear_dense_nnz": 3685056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3921792, "linear_attention_total": 2359296, "linear_attention_nnz": 713856, "linear_dense_total": 4718592, "linear_dense_nnz": 3207936}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2581056, "linear_attention_total": 2359296, "linear_attention_nnz": 465600, "linear_dense_total": 4718592, "linear_dense_nnz": 2115456}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879424, "linear_attention_total": 2359296, "linear_attention_nnz": 362048, "linear_dense_total": 4718592, "linear_dense_nnz": 1517376}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1281024, "linear_attention_total": 2359296, "linear_attention_nnz": 217216, "linear_dense_total": 4718592, "linear_dense_nnz": 1063808}}, "total_sparsity": 33.51726342179023, "linear_sparsity": 42.95594015239198}, "speed": {"eval_elapsed_time": 32.87603668309748}, "opt_eval_metrics": {"exact_match": 80.82308420056765, "f1": 88.21300800880684}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 44702229, "linear_total": 84934656, "linear_nnz": 20786688, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1582592, "linear_attention_total": 2359296, "linear_attention_nnz": 1055744, "linear_dense_total": 4718592, "linear_dense_nnz": 526848}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1562624, "linear_attention_total": 2359296, "linear_attention_nnz": 809984, "linear_dense_total": 4718592, "linear_dense_nnz": 752640}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2190848, "linear_attention_total": 2359296, "linear_attention_nnz": 1316864, "linear_dense_total": 4718592, "linear_dense_nnz": 873984}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2420736, "linear_attention_total": 2359296, "linear_attention_nnz": 1468416, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2697728, "linear_attention_total": 2359296, "linear_attention_nnz": 1651712, "linear_dense_total": 4718592, "linear_dense_nnz": 1046016}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2603008, "linear_attention_total": 2359296, "linear_attention_nnz": 1616896, "linear_dense_total": 4718592, "linear_dense_nnz": 986112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102272, "linear_attention_total": 2359296, "linear_attention_nnz": 1361920, "linear_dense_total": 4718592, "linear_dense_nnz": 740352}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1824768, "linear_attention_total": 2359296, "linear_attention_nnz": 1265664, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1505792, "linear_attention_total": 2359296, "linear_attention_nnz": 1212416, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 863232, "linear_attention_total": 2359296, "linear_attention_nnz": 749568, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 652288, "linear_dense_total": 4718592, "linear_dense_nnz": 98304}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 682496, "linear_attention_total": 2359296, "linear_attention_nnz": 419840, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}}, "total_sparsity": 58.94855257518133, "linear_sparsity": 75.52625868055556}, "speed": {"eval_elapsed_time": 19.962007428053766}, "opt_eval_metrics": {"exact_match": 80.58656575212866, "f1": 88.06903108265608}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l10--2021-01-20--18-58-11/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 72878482, "linear_total": 84934656, "linear_nnz": 48937216, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4586496, "linear_attention_total": 2359296, "linear_attention_nnz": 517888, "linear_dense_total": 4718592, "linear_dense_nnz": 4068608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4844288, "linear_attention_total": 2359296, "linear_attention_nnz": 641536, "linear_dense_total": 4718592, "linear_dense_nnz": 4202752}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5155328, "linear_attention_total": 2359296, "linear_attention_nnz": 841472, "linear_dense_total": 4718592, "linear_dense_nnz": 4313856}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5409024, "linear_attention_total": 2359296, "linear_attention_nnz": 1072896, "linear_dense_total": 4718592, "linear_dense_nnz": 4336128}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5385984, "linear_attention_total": 2359296, "linear_attention_nnz": 1068800, "linear_dense_total": 4718592, "linear_dense_nnz": 4317184}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5272832, "linear_attention_total": 2359296, "linear_attention_nnz": 961792, "linear_dense_total": 4718592, "linear_dense_nnz": 4311040}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5128448, "linear_attention_total": 2359296, "linear_attention_nnz": 986880, "linear_dense_total": 4718592, "linear_dense_nnz": 4141568}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4725504, "linear_attention_total": 2359296, "linear_attention_nnz": 905472, "linear_dense_total": 4718592, "linear_dense_nnz": 3820032}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3841792, "linear_attention_total": 2359296, "linear_attention_nnz": 756224, "linear_dense_total": 4718592, "linear_dense_nnz": 3085568}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879808, "linear_attention_total": 2359296, "linear_attention_nnz": 463360, "linear_dense_total": 4718592, "linear_dense_nnz": 1416448}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1505792, "linear_attention_total": 2359296, "linear_attention_nnz": 415488, "linear_dense_total": 4718592, "linear_dense_nnz": 1090304}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1201920, "linear_attention_total": 2359296, "linear_attention_nnz": 254720, "linear_dense_total": 4718592, "linear_dense_nnz": 947200}}, "total_sparsity": 33.07342297799975, "linear_sparsity": 42.38251109182099}, "speed": {"eval_elapsed_time": 30.725059562828392}, "opt_eval_metrics": {"exact_match": 80.93661305581836, "f1": 88.34112193061533}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 34772839, "linear_total": 84934656, "linear_nnz": 10866176, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 732160, "linear_attention_total": 2359296, "linear_attention_nnz": 550912, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835072, "linear_attention_total": 2359296, "linear_attention_nnz": 535552, "linear_dense_total": 4718592, "linear_dense_nnz": 299520}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1128960, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 407040}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1551872, "linear_attention_total": 2359296, "linear_attention_nnz": 1111040, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1389056, "linear_attention_total": 2359296, "linear_attention_nnz": 892928, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1096704, "linear_attention_total": 2359296, "linear_attention_nnz": 663552, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1000448, "linear_attention_total": 2359296, "linear_attention_nnz": 662528, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 801792, "linear_dense_total": 4718592, "linear_dense_nnz": 268800}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 803328, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 498688, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 73728}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 422912, "linear_attention_total": 2359296, "linear_attention_nnz": 364544, "linear_dense_total": 4718592, "linear_dense_nnz": 58368}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 336384, "linear_attention_total": 2359296, "linear_attention_nnz": 239616, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 68.06702028169144, "linear_sparsity": 87.20642843364197}, "speed": {"eval_elapsed_time": 14.562878740951419}, "opt_eval_metrics": {"exact_match": 77.8240302743614, "f1": 86.11992485005756}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l40--2021-01-20--18-59-08/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53223538, "linear_total": 84934656, "linear_nnz": 29295872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2685696, "linear_attention_total": 2359296, "linear_attention_nnz": 331008, "linear_dense_total": 4718592, "linear_dense_nnz": 2354688}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3258624, "linear_attention_total": 2359296, "linear_attention_nnz": 432384, "linear_dense_total": 4718592, "linear_dense_nnz": 2826240}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3726080, "linear_attention_total": 2359296, "linear_attention_nnz": 423936, "linear_dense_total": 4718592, "linear_dense_nnz": 3302144}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3917568, "linear_attention_total": 2359296, "linear_attention_nnz": 669440, "linear_dense_total": 4718592, "linear_dense_nnz": 3248128}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3647232, "linear_attention_total": 2359296, "linear_attention_nnz": 453632, "linear_dense_total": 4718592, "linear_dense_nnz": 3193600}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3593472, "linear_attention_total": 2359296, "linear_attention_nnz": 473856, "linear_dense_total": 4718592, "linear_dense_nnz": 3119616}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2939648, "linear_attention_total": 2359296, "linear_attention_nnz": 445952, "linear_dense_total": 4718592, "linear_dense_nnz": 2493696}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2381824, "linear_attention_total": 2359296, "linear_attention_nnz": 490752, "linear_dense_total": 4718592, "linear_dense_nnz": 1891072}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1384448, "linear_attention_total": 2359296, "linear_attention_nnz": 275712, "linear_dense_total": 4718592, "linear_dense_nnz": 1108736}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 258304, "linear_dense_total": 4718592, "linear_dense_nnz": 348928}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 618752, "linear_attention_total": 2359296, "linear_attention_nnz": 203008, "linear_dense_total": 4718592, "linear_dense_nnz": 415744}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 535296, "linear_attention_total": 2359296, "linear_attention_nnz": 112128, "linear_dense_total": 4718592, "linear_dense_nnz": 423168}}, "total_sparsity": 51.12316945157615, "linear_sparsity": 65.5077522183642}, "speed": {"eval_elapsed_time": 23.845138414064422}, "opt_eval_metrics": {"exact_match": 78.11731315042573, "f1": 86.14927876930865}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l10--2021-01-19--17-00-07/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 56967217, "linear_total": 84934656, "linear_nnz": 33019881, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3067235, "linear_attention_total": 2359296, "linear_attention_nnz": 356016, "linear_dense_total": 4718592, "linear_dense_nnz": 2711219}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3260347, "linear_attention_total": 2359296, "linear_attention_nnz": 506400, "linear_dense_total": 4718592, "linear_dense_nnz": 2753947}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3486676, "linear_attention_total": 2359296, "linear_attention_nnz": 658880, "linear_dense_total": 4718592, "linear_dense_nnz": 2827796}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3592390, "linear_attention_total": 2359296, "linear_attention_nnz": 782176, "linear_dense_total": 4718592, "linear_dense_nnz": 2810214}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3644732, "linear_attention_total": 2359296, "linear_attention_nnz": 874272, "linear_dense_total": 4718592, "linear_dense_nnz": 2770460}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3523230, "linear_attention_total": 2359296, "linear_attention_nnz": 772928, "linear_dense_total": 4718592, "linear_dense_nnz": 2750302}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3378315, "linear_attention_total": 2359296, "linear_attention_nnz": 767984, "linear_dense_total": 4718592, "linear_dense_nnz": 2610331}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2983346, "linear_attention_total": 2359296, "linear_attention_nnz": 687968, "linear_dense_total": 4718592, "linear_dense_nnz": 2295378}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465095, "linear_attention_total": 2359296, "linear_attention_nnz": 596368, "linear_dense_total": 4718592, "linear_dense_nnz": 1868727}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1650223, "linear_attention_total": 2359296, "linear_attention_nnz": 404448, "linear_dense_total": 4718592, "linear_dense_nnz": 1245775}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1262562, "linear_attention_total": 2359296, "linear_attention_nnz": 305952, "linear_dense_total": 4718592, "linear_dense_nnz": 956610}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705730, "linear_attention_total": 2359296, "linear_attention_nnz": 172864, "linear_dense_total": 4718592, "linear_dense_nnz": 532866}}, "total_sparsity": 47.6852325727709, "linear_sparsity": 61.12319451791268}, "speed": {"eval_elapsed_time": 35.13715321500786}, "opt_eval_metrics": {"exact_match": 81.3434247871334, "f1": 88.502960365548}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl2--2021-01-21--00-54-43/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 34069864, "linear_total": 84934656, "linear_nnz": 10163200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 674816, "linear_attention_total": 2359296, "linear_attention_nnz": 598016, "linear_dense_total": 4718592, "linear_dense_nnz": 76800}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 621568, "linear_dense_total": 4718592, "linear_dense_nnz": 129024}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1137664, "linear_attention_total": 2359296, "linear_attention_nnz": 937984, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1458176, "linear_attention_total": 2359296, "linear_attention_nnz": 1193984, "linear_dense_total": 4718592, "linear_dense_nnz": 264192}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1335808, "linear_attention_total": 2359296, "linear_attention_nnz": 1057792, "linear_dense_total": 4718592, "linear_dense_nnz": 278016}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 843264, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 948736, "linear_attention_total": 2359296, "linear_attention_nnz": 759808, "linear_dense_total": 4718592, "linear_dense_nnz": 188928}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 979456, "linear_attention_total": 2359296, "linear_attention_nnz": 830464, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 833536, "linear_attention_total": 2359296, "linear_attention_nnz": 753664, "linear_dense_total": 4718592, "linear_dense_nnz": 79872}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 478208, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 46080}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 432128, "linear_attention_total": 2359296, "linear_attention_nnz": 395264, "linear_dense_total": 4718592, "linear_dense_nnz": 36864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 290816, "linear_attention_total": 2359296, "linear_attention_nnz": 238592, "linear_dense_total": 4718592, "linear_dense_nnz": 52224}}, "total_sparsity": 68.71258409134985, "linear_sparsity": 88.03409529320987}, "speed": {"eval_elapsed_time": 15.140548604074866}, "opt_eval_metrics": {"exact_match": 76.9914853358562, "f1": 85.26341062121247}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l20--2021-01-20--19-00-06/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 62877338, "linear_total": 84934656, "linear_nnz": 38938240, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3827456, "linear_attention_total": 2359296, "linear_attention_nnz": 326336, "linear_dense_total": 4718592, "linear_dense_nnz": 3501120}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4141120, "linear_attention_total": 2359296, "linear_attention_nnz": 487552, "linear_dense_total": 4718592, "linear_dense_nnz": 3653568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4289088, "linear_attention_total": 2359296, "linear_attention_nnz": 487616, "linear_dense_total": 4718592, "linear_dense_nnz": 3801472}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4512896, "linear_attention_total": 2359296, "linear_attention_nnz": 712832, "linear_dense_total": 4718592, "linear_dense_nnz": 3800064}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4390144, "linear_attention_total": 2359296, "linear_attention_nnz": 646272, "linear_dense_total": 4718592, "linear_dense_nnz": 3743872}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4316928, "linear_attention_total": 2359296, "linear_attention_nnz": 625600, "linear_dense_total": 4718592, "linear_dense_nnz": 3691328}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4036864, "linear_attention_total": 2359296, "linear_attention_nnz": 575808, "linear_dense_total": 4718592, "linear_dense_nnz": 3461056}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3592320, "linear_attention_total": 2359296, "linear_attention_nnz": 579392, "linear_dense_total": 4718592, "linear_dense_nnz": 3012928}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2753408, "linear_attention_total": 2359296, "linear_attention_nnz": 405632, "linear_dense_total": 4718592, "linear_dense_nnz": 2347776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1318784, "linear_attention_total": 2359296, "linear_attention_nnz": 317440, "linear_dense_total": 4718592, "linear_dense_nnz": 1001344}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 994816, "linear_attention_total": 2359296, "linear_attention_nnz": 238208, "linear_dense_total": 4718592, "linear_dense_nnz": 756608}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 764416, "linear_attention_total": 2359296, "linear_attention_nnz": 141568, "linear_dense_total": 4718592, "linear_dense_nnz": 622848}}, "total_sparsity": 42.257784614732465, "linear_sparsity": 54.1550624517747}, "speed": {"eval_elapsed_time": 29.41211991594173}, "opt_eval_metrics": {"exact_match": 80.05676442762535, "f1": 87.66615713942541}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 42128141, "linear_total": 84934656, "linear_nnz": 18215424, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277440, "linear_attention_total": 2359296, "linear_attention_nnz": 643072, "linear_dense_total": 4718592, "linear_dense_nnz": 634368}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1539584, "linear_attention_total": 2359296, "linear_attention_nnz": 622592, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2068480, "linear_attention_total": 2359296, "linear_attention_nnz": 1051648, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2334208, "linear_attention_total": 2359296, "linear_attention_nnz": 1257472, "linear_dense_total": 4718592, "linear_dense_nnz": 1076736}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2473984, "linear_attention_total": 2359296, "linear_attention_nnz": 1315840, "linear_dense_total": 4718592, "linear_dense_nnz": 1158144}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2078208, "linear_attention_total": 2359296, "linear_attention_nnz": 1004544, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1820160, "linear_attention_total": 2359296, "linear_attention_nnz": 1004544, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1555456, "linear_attention_total": 2359296, "linear_attention_nnz": 925696, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236992, "linear_attention_total": 2359296, "linear_attention_nnz": 899072, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 663040, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 576000, "linear_attention_total": 2359296, "linear_attention_nnz": 463872, "linear_dense_total": 4718592, "linear_dense_nnz": 112128}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591872, "linear_attention_total": 2359296, "linear_attention_nnz": 278528, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}}, "total_sparsity": 61.31241765669342, "linear_sparsity": 78.55360243055556}, "speed": {"eval_elapsed_time": 17.672173040919006}, "opt_eval_metrics": {"exact_match": 79.66887417218543, "f1": 87.3881230572442}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 39533983, "linear_total": 84934656, "linear_nnz": 15622656, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1107968, "linear_attention_total": 2359296, "linear_attention_nnz": 809984, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1204736, "linear_attention_total": 2359296, "linear_attention_nnz": 720896, "linear_dense_total": 4718592, "linear_dense_nnz": 483840}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1717760, "linear_attention_total": 2359296, "linear_attention_nnz": 1098752, "linear_dense_total": 4718592, "linear_dense_nnz": 619008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1967104, "linear_attention_total": 2359296, "linear_attention_nnz": 1309696, "linear_dense_total": 4718592, "linear_dense_nnz": 657408}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2067968, "linear_attention_total": 2359296, "linear_attention_nnz": 1362944, "linear_dense_total": 4718592, "linear_dense_nnz": 705024}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742336, "linear_attention_total": 2359296, "linear_attention_nnz": 1074176, "linear_dense_total": 4718592, "linear_dense_nnz": 668160}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1565696, "linear_attention_total": 2359296, "linear_attention_nnz": 1049600, "linear_dense_total": 4718592, "linear_dense_nnz": 516096}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1342464, "linear_attention_total": 2359296, "linear_attention_nnz": 958464, "linear_dense_total": 4718592, "linear_dense_nnz": 384000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1153536, "linear_attention_total": 2359296, "linear_attention_nnz": 949248, "linear_dense_total": 4718592, "linear_dense_nnz": 204288}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 729088, "linear_attention_total": 2359296, "linear_attention_nnz": 636928, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 551936, "linear_attention_total": 2359296, "linear_attention_nnz": 478208, "linear_dense_total": 4718592, "linear_dense_nnz": 73728}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 472064, "linear_attention_total": 2359296, "linear_attention_nnz": 312320, "linear_dense_total": 4718592, "linear_dense_nnz": 159744}}, "total_sparsity": 63.694713643514845, "linear_sparsity": 81.6062644675926}, "speed": {"eval_elapsed_time": 17.396596929989755}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 87.14755939306319}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl2--2021-01-21--00-51-49/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 42626625, "linear_total": 84934656, "linear_nnz": 18712064, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1420800, "linear_attention_total": 2359296, "linear_attention_nnz": 1210368, "linear_dense_total": 4718592, "linear_dense_nnz": 210432}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1381888, "linear_attention_total": 2359296, "linear_attention_nnz": 977920, "linear_dense_total": 4718592, "linear_dense_nnz": 403968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2013184, "linear_attention_total": 2359296, "linear_attention_nnz": 1500160, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2115072, "linear_attention_total": 2359296, "linear_attention_nnz": 1526784, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2395136, "linear_attention_total": 2359296, "linear_attention_nnz": 1734656, "linear_dense_total": 4718592, "linear_dense_nnz": 660480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2211328, "linear_attention_total": 2359296, "linear_attention_nnz": 1659904, "linear_dense_total": 4718592, "linear_dense_nnz": 551424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1943040, "linear_attention_total": 2359296, "linear_attention_nnz": 1486848, "linear_dense_total": 4718592, "linear_dense_nnz": 456192}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1590784, "linear_attention_total": 2359296, "linear_attention_nnz": 1254400, "linear_dense_total": 4718592, "linear_dense_nnz": 336384}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1441280, "linear_attention_total": 2359296, "linear_attention_nnz": 1267712, "linear_dense_total": 4718592, "linear_dense_nnz": 173568}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 837632, "linear_attention_total": 2359296, "linear_attention_nnz": 760832, "linear_dense_total": 4718592, "linear_dense_nnz": 76800}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 781824, "linear_attention_total": 2359296, "linear_attention_nnz": 712704, "linear_dense_total": 4718592, "linear_dense_nnz": 69120}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 580096, "linear_attention_total": 2359296, "linear_attention_nnz": 443392, "linear_dense_total": 4718592, "linear_dense_nnz": 136704}}, "total_sparsity": 60.85464429335368, "linear_sparsity": 77.96887056327161}, "speed": {"eval_elapsed_time": 19.82656983099878}, "opt_eval_metrics": {"exact_match": 80.10406811731315, "f1": 87.56487698206614}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l40--2021-01-19--16-58-18/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 42014844, "linear_total": 84934656, "linear_nnz": 18080164, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1930333, "linear_attention_total": 2359296, "linear_attention_nnz": 211712, "linear_dense_total": 4718592, "linear_dense_nnz": 1718621}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2116878, "linear_attention_total": 2359296, "linear_attention_nnz": 345600, "linear_dense_total": 4718592, "linear_dense_nnz": 1771278}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2094823, "linear_attention_total": 2359296, "linear_attention_nnz": 278016, "linear_dense_total": 4718592, "linear_dense_nnz": 1816807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2266081, "linear_attention_total": 2359296, "linear_attention_nnz": 493312, "linear_dense_total": 4718592, "linear_dense_nnz": 1772769}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1986893, "linear_attention_total": 2359296, "linear_attention_nnz": 304128, "linear_dense_total": 4718592, "linear_dense_nnz": 1682765}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1992507, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 1635131}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1736239, "linear_attention_total": 2359296, "linear_attention_nnz": 278528, "linear_dense_total": 4718592, "linear_dense_nnz": 1457711}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529879, "linear_attention_total": 2359296, "linear_attention_nnz": 355072, "linear_dense_total": 4718592, "linear_dense_nnz": 1174807}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1051417, "linear_attention_total": 2359296, "linear_attention_nnz": 183552, "linear_dense_total": 4718592, "linear_dense_nnz": 867865}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 636321, "linear_attention_total": 2359296, "linear_attention_nnz": 196864, "linear_dense_total": 4718592, "linear_dense_nnz": 439457}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 483651, "linear_attention_total": 2359296, "linear_attention_nnz": 157696, "linear_dense_total": 4718592, "linear_dense_nnz": 325955}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 255142, "linear_attention_total": 2359296, "linear_attention_nnz": 90368, "linear_dense_total": 4718592, "linear_dense_nnz": 164774}}, "total_sparsity": 61.41646181607727, "linear_sparsity": 78.7128542676384}, "speed": {"eval_elapsed_time": 27.474724027095363}, "opt_eval_metrics": {"exact_match": 78.86471144749291, "f1": 86.87223379259328}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_0/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39362145, "linear_total": 84934656, "linear_nnz": 15449344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1074688, "linear_attention_total": 2359296, "linear_attention_nnz": 796672, "linear_dense_total": 4718592, "linear_dense_nnz": 278016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236224, "linear_attention_total": 2359296, "linear_attention_nnz": 769280, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1635328, "linear_attention_total": 2359296, "linear_attention_nnz": 1056256, "linear_dense_total": 4718592, "linear_dense_nnz": 579072}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 1259264, "linear_dense_total": 4718592, "linear_dense_nnz": 640512}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2041856, "linear_attention_total": 2359296, "linear_attention_nnz": 1344512, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860096, "linear_attention_total": 2359296, "linear_attention_nnz": 1244160, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571072, "linear_attention_total": 2359296, "linear_attention_nnz": 1088768, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1299200, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 382464}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1150464, "linear_attention_total": 2359296, "linear_attention_nnz": 955392, "linear_dense_total": 4718592, "linear_dense_nnz": 195072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668928, "linear_attention_total": 2359296, "linear_attention_nnz": 590592, "linear_dense_total": 4718592, "linear_dense_nnz": 78336}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 548352, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 72192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 463360, "linear_attention_total": 2359296, "linear_attention_nnz": 308224, "linear_dense_total": 4718592, "linear_dense_nnz": 155136}}, "total_sparsity": 63.85251782420986, "linear_sparsity": 81.81031780478395}, "speed": {"eval_elapsed_time": 18.814206156879663}, "opt_eval_metrics": {"exact_match": 79.33774834437087, "f1": 87.07382313022637}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l20--2021-01-19--17-00-34/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 47377613, "linear_total": 84934656, "linear_nnz": 23436196, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2342065, "linear_attention_total": 2359296, "linear_attention_nnz": 233808, "linear_dense_total": 4718592, "linear_dense_nnz": 2108257}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2536721, "linear_attention_total": 2359296, "linear_attention_nnz": 370912, "linear_dense_total": 4718592, "linear_dense_nnz": 2165809}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2607352, "linear_attention_total": 2359296, "linear_attention_nnz": 368864, "linear_dense_total": 4718592, "linear_dense_nnz": 2238488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2740822, "linear_attention_total": 2359296, "linear_attention_nnz": 528528, "linear_dense_total": 4718592, "linear_dense_nnz": 2212294}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2662766, "linear_attention_total": 2359296, "linear_attention_nnz": 515168, "linear_dense_total": 4718592, "linear_dense_nnz": 2147598}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2582248, "linear_attention_total": 2359296, "linear_attention_nnz": 456576, "linear_dense_total": 4718592, "linear_dense_nnz": 2125672}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2384302, "linear_attention_total": 2359296, "linear_attention_nnz": 426512, "linear_dense_total": 4718592, "linear_dense_nnz": 1957790}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045939, "linear_attention_total": 2359296, "linear_attention_nnz": 424416, "linear_dense_total": 4718592, "linear_dense_nnz": 1621523}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1531552, "linear_attention_total": 2359296, "linear_attention_nnz": 311248, "linear_dense_total": 4718592, "linear_dense_nnz": 1220304}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 936640, "linear_attention_total": 2359296, "linear_attention_nnz": 249120, "linear_dense_total": 4718592, "linear_dense_nnz": 687520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 686193, "linear_attention_total": 2359296, "linear_attention_nnz": 189856, "linear_dense_total": 4718592, "linear_dense_nnz": 496337}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379596, "linear_attention_total": 2359296, "linear_attention_nnz": 106192, "linear_dense_total": 4718592, "linear_dense_nnz": 273404}}, "total_sparsity": 56.49166422589565, "linear_sparsity": 72.40679234634212}, "speed": {"eval_elapsed_time": 32.3695623409003}, "opt_eval_metrics": {"exact_match": 80.54872280037843, "f1": 88.09731480353894}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42067458, "linear_total": 84934656, "linear_nnz": 18108928, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437184, "linear_attention_total": 2359296, "linear_attention_nnz": 472576, "linear_dense_total": 4718592, "linear_dense_nnz": 964608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1754624, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015488, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2276608, "linear_attention_total": 2359296, "linear_attention_nnz": 951040, "linear_dense_total": 4718592, "linear_dense_nnz": 1325568}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2280448, "linear_attention_total": 2359296, "linear_attention_nnz": 861184, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2123008, "linear_attention_total": 2359296, "linear_attention_nnz": 779008, "linear_dense_total": 4718592, "linear_dense_nnz": 1344000}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841152, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1041408}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553664, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042432, "linear_attention_total": 2359296, "linear_attention_nnz": 610816, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 584960, "linear_attention_total": 2359296, "linear_attention_nnz": 405248, "linear_dense_total": 4718592, "linear_dense_nnz": 179712}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 540928, "linear_attention_total": 2359296, "linear_attention_nnz": 395008, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 658432, "linear_attention_total": 2359296, "linear_attention_nnz": 217600, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}}, "total_sparsity": 61.3681447432349, "linear_sparsity": 78.6789882330247}, "speed": {"eval_elapsed_time": 17.91969774197787}, "opt_eval_metrics": {"exact_match": 79.38505203405866, "f1": 87.07610213911921}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41735426, "linear_total": 84934656, "linear_nnz": 17776896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1405440, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 929280}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1732352, "linear_attention_total": 2359296, "linear_attention_nnz": 589568, "linear_dense_total": 4718592, "linear_dense_nnz": 1142784}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1979136, "linear_attention_total": 2359296, "linear_attention_nnz": 628992, "linear_dense_total": 4718592, "linear_dense_nnz": 1350144}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2218752, "linear_attention_total": 2359296, "linear_attention_nnz": 913152, "linear_dense_total": 4718592, "linear_dense_nnz": 1305600}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2257664, "linear_attention_total": 2359296, "linear_attention_nnz": 850688, "linear_dense_total": 4718592, "linear_dense_nnz": 1406976}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096384, "linear_attention_total": 2359296, "linear_attention_nnz": 764672, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1786112, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 1022976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1538816, "linear_attention_total": 2359296, "linear_attention_nnz": 781568, "linear_dense_total": 4718592, "linear_dense_nnz": 757248}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1027840, "linear_attention_total": 2359296, "linear_attention_nnz": 596224, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 176640}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523008, "linear_attention_total": 2359296, "linear_attention_nnz": 378624, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640000, "linear_attention_total": 2359296, "linear_attention_nnz": 208384, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.67306005721974, "linear_sparsity": 79.0699146412037}, "speed": {"eval_elapsed_time": 17.6927186998073}, "opt_eval_metrics": {"exact_match": 78.72280037842951, "f1": 86.62745564109652}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 43462146, "linear_total": 84934656, "linear_nnz": 19503616, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660672, "linear_attention_total": 2359296, "linear_attention_nnz": 579328, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 632576, "linear_dense_total": 4718592, "linear_dense_nnz": 1267200}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2031104, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1446912}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2544128, "linear_attention_total": 2359296, "linear_attention_nnz": 1049600, "linear_dense_total": 4718592, "linear_dense_nnz": 1494528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2395904, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 1479168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2184960, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 1394688}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1912320, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1113600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 969216, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 471808, "linear_dense_total": 4718592, "linear_dense_nnz": 497664}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 717312, "linear_attention_total": 2359296, "linear_attention_nnz": 505344, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631040, "linear_attention_total": 2359296, "linear_attention_nnz": 448256, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 288256, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}}, "total_sparsity": 60.08735936884057, "linear_sparsity": 77.03691647376543}, "speed": {"eval_elapsed_time": 16.150802633957937}, "opt_eval_metrics": {"exact_match": 78.93093661305582, "f1": 86.85787750084084}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42678018, "linear_total": 84934656, "linear_nnz": 18719488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1560320, "linear_attention_total": 2359296, "linear_attention_nnz": 543488, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1816320, "linear_attention_total": 2359296, "linear_attention_nnz": 593664, "linear_dense_total": 4718592, "linear_dense_nnz": 1222656}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2024704, "linear_attention_total": 2359296, "linear_attention_nnz": 603904, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2329856, "linear_attention_total": 2359296, "linear_attention_nnz": 870656, "linear_dense_total": 4718592, "linear_dense_nnz": 1459200}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2332928, "linear_attention_total": 2359296, "linear_attention_nnz": 887552, "linear_dense_total": 4718592, "linear_dense_nnz": 1445376}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 720640, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742080, "linear_attention_total": 2359296, "linear_attention_nnz": 926464, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 944384, "linear_attention_total": 2359296, "linear_attention_nnz": 455936, "linear_dense_total": 4718592, "linear_dense_nnz": 488448}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705280, "linear_attention_total": 2359296, "linear_attention_nnz": 505600, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 587264, "linear_attention_total": 2359296, "linear_attention_nnz": 409088, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 697856, "linear_attention_total": 2359296, "linear_attention_nnz": 250880, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}}, "total_sparsity": 60.80744850279245, "linear_sparsity": 77.96012972608024}, "speed": {"eval_elapsed_time": 15.863338297931477}, "opt_eval_metrics": {"exact_match": 78.78902554399244, "f1": 86.64151988736798}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 46192898, "linear_total": 84934656, "linear_nnz": 22234368, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 551680, "linear_dense_total": 4718592, "linear_dense_nnz": 1539072}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2278656, "linear_attention_total": 2359296, "linear_attention_nnz": 596736, "linear_dense_total": 4718592, "linear_dense_nnz": 1681920}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2418688, "linear_attention_total": 2359296, "linear_attention_nnz": 567808, "linear_dense_total": 4718592, "linear_dense_nnz": 1850880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2844416, "linear_attention_total": 2359296, "linear_attention_nnz": 1002752, "linear_dense_total": 4718592, "linear_dense_nnz": 1841664}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2691072, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 1812480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2475264, "linear_attention_total": 2359296, "linear_attention_nnz": 721152, "linear_dense_total": 4718592, "linear_dense_nnz": 1754112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2229248, "linear_attention_total": 2359296, "linear_attention_nnz": 805376, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1966336, "linear_attention_total": 2359296, "linear_attention_nnz": 892672, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701440, "linear_attention_total": 2359296, "linear_attention_nnz": 454144, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598272, "linear_attention_total": 2359296, "linear_attention_nnz": 361728, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858880, "linear_attention_total": 2359296, "linear_attention_nnz": 238336, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}}, "total_sparsity": 57.57962486284496, "linear_sparsity": 73.82179542824075}, "speed": {"eval_elapsed_time": 17.57372920983471}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 86.84346997900737}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-130000": {"stats": {"total": 108893186, "nnz": 38778370, "linear_total": 84934656, "linear_nnz": 14819840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1050624, "linear_attention_total": 2359296, "linear_attention_nnz": 488448, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 512512, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1628160, "linear_attention_total": 2359296, "linear_attention_nnz": 628224, "linear_dense_total": 4718592, "linear_dense_nnz": 999936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998592, "linear_attention_total": 2359296, "linear_attention_nnz": 937216, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1939968, "linear_attention_total": 2359296, "linear_attention_nnz": 821760, "linear_dense_total": 4718592, "linear_dense_nnz": 1118208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1709824, "linear_attention_total": 2359296, "linear_attention_nnz": 648448, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404928, "linear_attention_total": 2359296, "linear_attention_nnz": 641536, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817920, "linear_attention_total": 2359296, "linear_attention_nnz": 467712, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 544512, "linear_attention_total": 2359296, "linear_attention_nnz": 403200, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484096, "linear_attention_total": 2359296, "linear_attention_nnz": 367360, "linear_dense_total": 4718592, "linear_dense_nnz": 116736}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496896, "linear_attention_total": 2359296, "linear_attention_nnz": 225024, "linear_dense_total": 4718592, "linear_dense_nnz": 271872}}, "total_sparsity": 64.38861656596218, "linear_sparsity": 82.5514805169753}, "speed": {"eval_elapsed_time": 15.812317132018507}, "opt_eval_metrics": {"exact_match": 78.88363292336803, "f1": 86.63235572290178}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-165000": {"stats": {"total": 108893186, "nnz": 38293506, "linear_total": 84934656, "linear_nnz": 14334976, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1010688, "linear_attention_total": 2359296, "linear_attention_nnz": 468480, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1371392, "linear_attention_total": 2359296, "linear_attention_nnz": 518912, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1590272, "linear_attention_total": 2359296, "linear_attention_nnz": 608768, "linear_dense_total": 4718592, "linear_dense_nnz": 981504}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895936, "linear_attention_total": 2359296, "linear_attention_nnz": 869888, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1869568, "linear_attention_total": 2359296, "linear_attention_nnz": 775936, "linear_dense_total": 4718592, "linear_dense_nnz": 1093632}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663232, "linear_attention_total": 2359296, "linear_attention_nnz": 618752, "linear_dense_total": 4718592, "linear_dense_nnz": 1044480}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 629248, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1295872, "linear_attention_total": 2359296, "linear_attention_nnz": 707584, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 808704, "linear_attention_total": 2359296, "linear_attention_nnz": 463104, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 515840, "linear_attention_total": 2359296, "linear_attention_nnz": 376064, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455936, "linear_attention_total": 2359296, "linear_attention_nnz": 345344, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 212992, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 64.83388225963009, "linear_sparsity": 83.1223476080247}, "speed": {"eval_elapsed_time": 15.71152348187752}, "opt_eval_metrics": {"exact_match": 78.96877956480606, "f1": 86.71968503618079}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38916354, "linear_total": 84934656, "linear_nnz": 14957824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1209344, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 749568}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1494272, "linear_attention_total": 2359296, "linear_attention_nnz": 488192, "linear_dense_total": 4718592, "linear_dense_nnz": 1006080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1636096, "linear_attention_total": 2359296, "linear_attention_nnz": 550144, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969664, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1746944, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 1198080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1782272, "linear_attention_total": 2359296, "linear_attention_nnz": 653312, "linear_dense_total": 4718592, "linear_dense_nnz": 1128960}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1461760, "linear_attention_total": 2359296, "linear_attention_nnz": 593920, "linear_dense_total": 4718592, "linear_dense_nnz": 867840}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 754688, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531968, "linear_attention_total": 2359296, "linear_attention_nnz": 373760, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460032, "linear_attention_total": 2359296, "linear_attention_nnz": 311040, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}}, "total_sparsity": 64.26190156654981, "linear_sparsity": 82.38902150848766}, "speed": {"eval_elapsed_time": 15.398823922965676}, "opt_eval_metrics": {"exact_match": 78.7038789025544, "f1": 86.58426699451658}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 50872322, "linear_total": 84934656, "linear_nnz": 26913792, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692352, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 2007552}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666496, "linear_attention_total": 2359296, "linear_attention_nnz": 646656, "linear_dense_total": 4718592, "linear_dense_nnz": 2019840}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2931200, "linear_attention_total": 2359296, "linear_attention_nnz": 691712, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3361024, "linear_attention_total": 2359296, "linear_attention_nnz": 1149184, "linear_dense_total": 4718592, "linear_dense_nnz": 2211840}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3165952, "linear_attention_total": 2359296, "linear_attention_nnz": 1007872, "linear_dense_total": 4718592, "linear_dense_nnz": 2158080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070976, "linear_attention_total": 2359296, "linear_attention_nnz": 997376, "linear_dense_total": 4718592, "linear_dense_nnz": 2073600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2644480, "linear_attention_total": 2359296, "linear_attention_nnz": 911872, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2248704, "linear_attention_total": 2359296, "linear_attention_nnz": 944640, "linear_dense_total": 4718592, "linear_dense_nnz": 1304064}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1514240, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 751104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 839424, "linear_attention_total": 2359296, "linear_attention_nnz": 526080, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 707072, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 274944}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1071872, "linear_attention_total": 2359296, "linear_attention_nnz": 277760, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}}, "total_sparsity": 53.282364242699266, "linear_sparsity": 68.31235532407408}, "speed": {"eval_elapsed_time": 19.785655258921906}, "opt_eval_metrics": {"exact_match": 79.99053926206244, "f1": 87.56439208763325}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl225_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 27752545, "linear_total": 84934656, "linear_nnz": 3794015, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 465383, "linear_attention_total": 2359296, "linear_attention_nnz": 18728, "linear_dense_total": 4718592, "linear_dense_nnz": 446655}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527397, "linear_attention_total": 2359296, "linear_attention_nnz": 63059, "linear_dense_total": 4718592, "linear_dense_nnz": 464338}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516492, "linear_attention_total": 2359296, "linear_attention_nnz": 53761, "linear_dense_total": 4718592, "linear_dense_nnz": 462731}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 514972, "linear_attention_total": 2359296, "linear_attention_nnz": 84624, "linear_dense_total": 4718592, "linear_dense_nnz": 430348}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 443214, "linear_attention_total": 2359296, "linear_attention_nnz": 58345, "linear_dense_total": 4718592, "linear_dense_nnz": 384869}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 396921, "linear_attention_total": 2359296, "linear_attention_nnz": 50615, "linear_dense_total": 4718592, "linear_dense_nnz": 346306}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 319004, "linear_attention_total": 2359296, "linear_attention_nnz": 41344, "linear_dense_total": 4718592, "linear_dense_nnz": 277660}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 249183, "linear_attention_total": 2359296, "linear_attention_nnz": 47420, "linear_dense_total": 4718592, "linear_dense_nnz": 201763}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 161062, "linear_attention_total": 2359296, "linear_attention_nnz": 27562, "linear_dense_total": 4718592, "linear_dense_nnz": 133500}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 81705, "linear_attention_total": 2359296, "linear_attention_nnz": 34151, "linear_dense_total": 4718592, "linear_dense_nnz": 47554}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64643, "linear_attention_total": 2359296, "linear_attention_nnz": 21311, "linear_dense_total": 4718592, "linear_dense_nnz": 43332}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 54039, "linear_attention_total": 2359296, "linear_attention_nnz": 17233, "linear_dense_total": 4718592, "linear_dense_nnz": 36806}}, "total_sparsity": 74.51397463933142, "linear_sparsity": 95.5330189363456}, "speed": {"eval_elapsed_time": 28.275199214927852}, "opt_eval_metrics": {"exact_match": 77.39829706717124, "f1": 85.66626983371626}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41730197, "linear_total": 84934656, "linear_nnz": 17822208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2202624, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2397696, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1218048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2302464, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1122816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1794048, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1460736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1155072, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1290240, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 503808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 423936, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 227328}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 806400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 904704, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 511488}}, "total_sparsity": 61.67786201057612, "linear_sparsity": 79.0165653935185}, "speed": {"eval_elapsed_time": 12.337535696104169}, "opt_eval_metrics": {"exact_match": 77.70104068117313, "f1": 85.6071153919288}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 33917936, "linear_total": 84934656, "linear_nnz": 9959406, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1111233, "linear_attention_total": 2359296, "linear_attention_nnz": 56754, "linear_dense_total": 4718592, "linear_dense_nnz": 1054479}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222867, "linear_attention_total": 2359296, "linear_attention_nnz": 116764, "linear_dense_total": 4718592, "linear_dense_nnz": 1106103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264439, "linear_attention_total": 2359296, "linear_attention_nnz": 127558, "linear_dense_total": 4718592, "linear_dense_nnz": 1136881}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1270104, "linear_attention_total": 2359296, "linear_attention_nnz": 163709, "linear_dense_total": 4718592, "linear_dense_nnz": 1106395}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1202300, "linear_attention_total": 2359296, "linear_attention_nnz": 158018, "linear_dense_total": 4718592, "linear_dense_nnz": 1044282}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1136195, "linear_attention_total": 2359296, "linear_attention_nnz": 125746, "linear_dense_total": 4718592, "linear_dense_nnz": 1010449}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 971117, "linear_attention_total": 2359296, "linear_attention_nnz": 110023, "linear_dense_total": 4718592, "linear_dense_nnz": 861094}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746075, "linear_attention_total": 2359296, "linear_attention_nnz": 113086, "linear_dense_total": 4718592, "linear_dense_nnz": 632989}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488971, "linear_attention_total": 2359296, "linear_attention_nnz": 81879, "linear_dense_total": 4718592, "linear_dense_nnz": 407092}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 250695, "linear_attention_total": 2359296, "linear_attention_nnz": 77365, "linear_dense_total": 4718592, "linear_dense_nnz": 173330}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 172793, "linear_attention_total": 2359296, "linear_attention_nnz": 50915, "linear_dense_total": 4718592, "linear_dense_nnz": 121878}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 122617, "linear_attention_total": 2359296, "linear_attention_nnz": 28303, "linear_dense_total": 4718592, "linear_dense_nnz": 94314}}, "total_sparsity": 68.85210429971255, "linear_sparsity": 88.27403739646628}, "speed": {"eval_elapsed_time": 33.620146826142445}, "opt_eval_metrics": {"exact_match": 79.89593188268685, "f1": 87.64967103979136}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33825359, "linear_total": 84934656, "linear_nnz": 9866829, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1100628, "linear_attention_total": 2359296, "linear_attention_nnz": 56086, "linear_dense_total": 4718592, "linear_dense_nnz": 1044542}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211778, "linear_attention_total": 2359296, "linear_attention_nnz": 115328, "linear_dense_total": 4718592, "linear_dense_nnz": 1096450}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253069, "linear_attention_total": 2359296, "linear_attention_nnz": 125881, "linear_dense_total": 4718592, "linear_dense_nnz": 1127188}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258511, "linear_attention_total": 2359296, "linear_attention_nnz": 161525, "linear_dense_total": 4718592, "linear_dense_nnz": 1096986}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1191705, "linear_attention_total": 2359296, "linear_attention_nnz": 155911, "linear_dense_total": 4718592, "linear_dense_nnz": 1035794}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125428, "linear_attention_total": 2359296, "linear_attention_nnz": 123921, "linear_dense_total": 4718592, "linear_dense_nnz": 1001507}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 961919, "linear_attention_total": 2359296, "linear_attention_nnz": 108430, "linear_dense_total": 4718592, "linear_dense_nnz": 853489}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738628, "linear_attention_total": 2359296, "linear_attention_nnz": 111505, "linear_dense_total": 4718592, "linear_dense_nnz": 627123}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484188, "linear_attention_total": 2359296, "linear_attention_nnz": 80805, "linear_dense_total": 4718592, "linear_dense_nnz": 403383}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 247948, "linear_attention_total": 2359296, "linear_attention_nnz": 76456, "linear_dense_total": 4718592, "linear_dense_nnz": 171492}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 171235, "linear_attention_total": 2359296, "linear_attention_nnz": 50374, "linear_dense_total": 4718592, "linear_dense_nnz": 120861}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 121792, "linear_attention_total": 2359296, "linear_attention_nnz": 28038, "linear_dense_total": 4718592, "linear_dense_nnz": 93754}}, "total_sparsity": 68.93712063856779, "linear_sparsity": 88.38303530657733}, "speed": {"eval_elapsed_time": 33.69571442203596}, "opt_eval_metrics": {"exact_match": 79.8391674550615, "f1": 87.59923644792065}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl150_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 29470276, "linear_total": 84934656, "linear_nnz": 5511746, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655184, "linear_attention_total": 2359296, "linear_attention_nnz": 30729, "linear_dense_total": 4718592, "linear_dense_nnz": 624455}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 733131, "linear_attention_total": 2359296, "linear_attention_nnz": 77742, "linear_dense_total": 4718592, "linear_dense_nnz": 655389}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730379, "linear_attention_total": 2359296, "linear_attention_nnz": 70206, "linear_dense_total": 4718592, "linear_dense_nnz": 660173}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 734451, "linear_attention_total": 2359296, "linear_attention_nnz": 106339, "linear_dense_total": 4718592, "linear_dense_nnz": 628112}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655863, "linear_attention_total": 2359296, "linear_attention_nnz": 81845, "linear_dense_total": 4718592, "linear_dense_nnz": 574018}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 606306, "linear_attention_total": 2359296, "linear_attention_nnz": 68554, "linear_dense_total": 4718592, "linear_dense_nnz": 537752}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 492846, "linear_attention_total": 2359296, "linear_attention_nnz": 58217, "linear_dense_total": 4718592, "linear_dense_nnz": 434629}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379389, "linear_attention_total": 2359296, "linear_attention_nnz": 65705, "linear_dense_total": 4718592, "linear_dense_nnz": 313684}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 243207, "linear_attention_total": 2359296, "linear_attention_nnz": 39483, "linear_dense_total": 4718592, "linear_dense_nnz": 203724}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 119606, "linear_attention_total": 2359296, "linear_attention_nnz": 46007, "linear_dense_total": 4718592, "linear_dense_nnz": 73599}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 89281, "linear_attention_total": 2359296, "linear_attention_nnz": 27892, "linear_dense_total": 4718592, "linear_dense_nnz": 61389}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 72103, "linear_attention_total": 2359296, "linear_attention_nnz": 20781, "linear_dense_total": 4718592, "linear_dense_nnz": 51322}}, "total_sparsity": 72.93652882926945, "linear_sparsity": 93.51060419906804}, "speed": {"eval_elapsed_time": 30.31329287402332}, "opt_eval_metrics": {"exact_match": 78.4484389782403, "f1": 86.3547925481507}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 72624802, "linear_total": 84934656, "linear_nnz": 48687104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4657152, "linear_attention_total": 2359296, "linear_attention_nnz": 621568, "linear_dense_total": 4718592, "linear_dense_nnz": 4035584}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4759552, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 4155392}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5065728, "linear_attention_total": 2359296, "linear_attention_nnz": 781312, "linear_dense_total": 4718592, "linear_dense_nnz": 4284416}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5408768, "linear_attention_total": 2359296, "linear_attention_nnz": 1068032, "linear_dense_total": 4718592, "linear_dense_nnz": 4340736}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5324800, "linear_attention_total": 2359296, "linear_attention_nnz": 1087488, "linear_dense_total": 4718592, "linear_dense_nnz": 4237312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5189632, "linear_attention_total": 2359296, "linear_attention_nnz": 908288, "linear_dense_total": 4718592, "linear_dense_nnz": 4281344}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5169152, "linear_attention_total": 2359296, "linear_attention_nnz": 1019904, "linear_dense_total": 4718592, "linear_dense_nnz": 4149248}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4749312, "linear_attention_total": 2359296, "linear_attention_nnz": 921600, "linear_dense_total": 4718592, "linear_dense_nnz": 3827712}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3932160, "linear_attention_total": 2359296, "linear_attention_nnz": 851968, "linear_dense_total": 4718592, "linear_dense_nnz": 3080192}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1808384, "linear_attention_total": 2359296, "linear_attention_nnz": 529408, "linear_dense_total": 4718592, "linear_dense_nnz": 1278976}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1443840, "linear_attention_total": 2359296, "linear_attention_nnz": 486400, "linear_dense_total": 4718592, "linear_dense_nnz": 957440}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178624, "linear_attention_total": 2359296, "linear_attention_nnz": 286720, "linear_dense_total": 4718592, "linear_dense_nnz": 891904}}, "total_sparsity": 33.306385213120684, "linear_sparsity": 42.67698688271605}, "speed": {"eval_elapsed_time": 27.85703947697766}, "opt_eval_metrics": {"exact_match": 80.72847682119205, "f1": 88.08831525592305}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 72671586, "linear_total": 84934656, "linear_nnz": 48734208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4676608, "linear_attention_total": 2359296, "linear_attention_nnz": 644096, "linear_dense_total": 4718592, "linear_dense_nnz": 4032512}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4756480, "linear_attention_total": 2359296, "linear_attention_nnz": 583680, "linear_dense_total": 4718592, "linear_dense_nnz": 4172800}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5113856, "linear_attention_total": 2359296, "linear_attention_nnz": 789504, "linear_dense_total": 4718592, "linear_dense_nnz": 4324352}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5421056, "linear_attention_total": 2359296, "linear_attention_nnz": 1028096, "linear_dense_total": 4718592, "linear_dense_nnz": 4392960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5426176, "linear_attention_total": 2359296, "linear_attention_nnz": 1067008, "linear_dense_total": 4718592, "linear_dense_nnz": 4359168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5276672, "linear_attention_total": 2359296, "linear_attention_nnz": 943104, "linear_dense_total": 4718592, "linear_dense_nnz": 4333568}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5165056, "linear_attention_total": 2359296, "linear_attention_nnz": 1003520, "linear_dense_total": 4718592, "linear_dense_nnz": 4161536}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4797440, "linear_attention_total": 2359296, "linear_attention_nnz": 908288, "linear_dense_total": 4718592, "linear_dense_nnz": 3889152}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3890176, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 3021824}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1726464, "linear_attention_total": 2359296, "linear_attention_nnz": 520192, "linear_dense_total": 4718592, "linear_dense_nnz": 1206272}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1336320, "linear_attention_total": 2359296, "linear_attention_nnz": 445440, "linear_dense_total": 4718592, "linear_dense_nnz": 890880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1147904, "linear_attention_total": 2359296, "linear_attention_nnz": 272384, "linear_dense_total": 4718592, "linear_dense_nnz": 875520}}, "total_sparsity": 33.26342201062975, "linear_sparsity": 42.62152777777778}, "speed": {"eval_elapsed_time": 27.81183459307067}, "opt_eval_metrics": {"exact_match": 80.81362346263009, "f1": 88.10463591853348}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 72950082, "linear_total": 84934656, "linear_nnz": 49012736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4754432, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 4140032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4843520, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 4246528}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5145600, "linear_attention_total": 2359296, "linear_attention_nnz": 788480, "linear_dense_total": 4718592, "linear_dense_nnz": 4357120}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5488640, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 4426752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5463040, "linear_attention_total": 2359296, "linear_attention_nnz": 1048576, "linear_dense_total": 4718592, "linear_dense_nnz": 4414464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5317632, "linear_attention_total": 2359296, "linear_attention_nnz": 918528, "linear_dense_total": 4718592, "linear_dense_nnz": 4399104}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5230592, "linear_attention_total": 2359296, "linear_attention_nnz": 998400, "linear_dense_total": 4718592, "linear_dense_nnz": 4232192}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4838400, "linear_attention_total": 2359296, "linear_attention_nnz": 899072, "linear_dense_total": 4718592, "linear_dense_nnz": 3939328}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3848192, "linear_attention_total": 2359296, "linear_attention_nnz": 819200, "linear_dense_total": 4718592, "linear_dense_nnz": 3028992}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1620992, "linear_attention_total": 2359296, "linear_attention_nnz": 516096, "linear_dense_total": 4718592, "linear_dense_nnz": 1104896}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1332224, "linear_attention_total": 2359296, "linear_attention_nnz": 450560, "linear_dense_total": 4718592, "linear_dense_nnz": 881664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1129472, "linear_attention_total": 2359296, "linear_attention_nnz": 266240, "linear_dense_total": 4718592, "linear_dense_nnz": 863232}}, "total_sparsity": 33.00767047076757, "linear_sparsity": 42.29359567901234}, "speed": {"eval_elapsed_time": 27.788447924889624}, "opt_eval_metrics": {"exact_match": 80.53926206244087, "f1": 87.95145431777735}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39356610, "linear_total": 84934656, "linear_nnz": 15444992, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1024000, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 512000}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236992, "linear_attention_total": 2359296, "linear_attention_nnz": 551936, "linear_dense_total": 4718592, "linear_dense_nnz": 685056}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1934336, "linear_attention_total": 2359296, "linear_attention_nnz": 722944, "linear_dense_total": 4718592, "linear_dense_nnz": 1211392}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2352128, "linear_attention_total": 2359296, "linear_attention_nnz": 954368, "linear_dense_total": 4718592, "linear_dense_nnz": 1397760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028544, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1238016}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1880064, "linear_attention_total": 2359296, "linear_attention_nnz": 584704, "linear_dense_total": 4718592, "linear_dense_nnz": 1295360}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1627136, "linear_attention_total": 2359296, "linear_attention_nnz": 608256, "linear_dense_total": 4718592, "linear_dense_nnz": 1018880}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1316864, "linear_attention_total": 2359296, "linear_attention_nnz": 740352, "linear_dense_total": 4718592, "linear_dense_nnz": 576512}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 673792, "linear_attention_total": 2359296, "linear_attention_nnz": 510976, "linear_dense_total": 4718592, "linear_dense_nnz": 162816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 451584, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 94208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 304128, "linear_dense_total": 4718592, "linear_dense_nnz": 197632}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417792, "linear_attention_total": 2359296, "linear_attention_nnz": 197632, "linear_dense_total": 4718592, "linear_dense_nnz": 220160}}, "total_sparsity": 63.85760078688487, "linear_sparsity": 81.81544174382715}, "speed": {"eval_elapsed_time": 17.21582882408984}, "opt_eval_metrics": {"exact_match": 76.79280983916746, "f1": 85.3167029862563}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39183362, "linear_total": 84934656, "linear_nnz": 15271936, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1032192, "linear_attention_total": 2359296, "linear_attention_nnz": 513024, "linear_dense_total": 4718592, "linear_dense_nnz": 519168}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1215488, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 692224}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1922048, "linear_attention_total": 2359296, "linear_attention_nnz": 683008, "linear_dense_total": 4718592, "linear_dense_nnz": 1239040}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2319360, "linear_attention_total": 2359296, "linear_attention_nnz": 945152, "linear_dense_total": 4718592, "linear_dense_nnz": 1374208}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045952, "linear_attention_total": 2359296, "linear_attention_nnz": 809984, "linear_dense_total": 4718592, "linear_dense_nnz": 1235968}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1847296, "linear_attention_total": 2359296, "linear_attention_nnz": 581632, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1607680, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1287168, "linear_attention_total": 2359296, "linear_attention_nnz": 708608, "linear_dense_total": 4718592, "linear_dense_nnz": 578560}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631808, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 158720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 442368, "linear_attention_total": 2359296, "linear_attention_nnz": 352256, "linear_dense_total": 4718592, "linear_dense_nnz": 90112}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 312320, "linear_dense_total": 4718592, "linear_dense_nnz": 206848}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 401408, "linear_attention_total": 2359296, "linear_attention_nnz": 186368, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 64.01669981444019, "linear_sparsity": 82.0191936728395}, "speed": {"eval_elapsed_time": 17.197634894168004}, "opt_eval_metrics": {"exact_match": 77.04824976348155, "f1": 85.17930403802184}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 37334018, "linear_total": 84934656, "linear_nnz": 13375488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663488, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1282560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451520, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 566784}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1385472, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1265664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 675840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1069056, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.7150099364344, "linear_sparsity": 84.25202546296296}, "speed": {"eval_elapsed_time": 11.265181887894869}, "opt_eval_metrics": {"exact_match": 76.54683065279092, "f1": 84.56290825102765}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37189634, "linear_total": 84934656, "linear_nnz": 13231104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1658880, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1233408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1354752, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1387008, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 678912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.84760225492897, "linear_sparsity": 84.42201967592592}, "speed": {"eval_elapsed_time": 11.085542490938678}, "opt_eval_metrics": {"exact_match": 75.99810785241249, "f1": 84.26442986520863}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36773378, "linear_total": 84934656, "linear_nnz": 12814848, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1044480, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1177088, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1450496, "linear_attention_total": 2359296, "linear_attention_nnz": 492032, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652224, "linear_attention_total": 2359296, "linear_attention_nnz": 733696, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1511680, "linear_attention_total": 2359296, "linear_attention_nnz": 461056, "linear_dense_total": 4718592, "linear_dense_nnz": 1050624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1533952, "linear_attention_total": 2359296, "linear_attention_nnz": 580096, "linear_dense_total": 4718592, "linear_dense_nnz": 953856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1227520, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 764928}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 624384, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 700416, "linear_attention_total": 2359296, "linear_attention_nnz": 351744, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 479744, "linear_attention_total": 2359296, "linear_attention_nnz": 339968, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411392, "linear_attention_total": 2359296, "linear_attention_nnz": 276224, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 430080, "linear_attention_total": 2359296, "linear_attention_nnz": 178176, "linear_dense_total": 4718592, "linear_dense_nnz": 251904}}, "total_sparsity": 66.22986308803564, "linear_sparsity": 84.912109375}, "speed": {"eval_elapsed_time": 14.512992850970477}, "opt_eval_metrics": {"exact_match": 77.94701986754967, "f1": 86.06827252573265}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 47136529, "linear_total": 84934656, "linear_nnz": 23220736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1984512, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2164224, "linear_attention_total": 2359296, "linear_attention_nnz": 592896, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2625536, "linear_attention_total": 2359296, "linear_attention_nnz": 880640, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2992640, "linear_attention_total": 2359296, "linear_attention_nnz": 1230848, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2940928, "linear_attention_total": 2359296, "linear_attention_nnz": 1214464, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2535936, "linear_attention_total": 2359296, "linear_attention_nnz": 906240, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2213376, "linear_attention_total": 2359296, "linear_attention_nnz": 943104, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1923584, "linear_attention_total": 2359296, "linear_attention_nnz": 935936, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1419264, "linear_attention_total": 2359296, "linear_attention_nnz": 872448, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 883712, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667648, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 869376, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 56.713059162397904, "linear_sparsity": 72.66046971450618}, "speed": {"eval_elapsed_time": 18.717300809919834}, "opt_eval_metrics": {"exact_match": 80.27436140018922, "f1": 87.70461789964966}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46572775, "linear_total": 84934656, "linear_nnz": 22657536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1908736, "linear_attention_total": 2359296, "linear_attention_nnz": 627712, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2145280, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 1548288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2499072, "linear_attention_total": 2359296, "linear_attention_nnz": 789504, "linear_dense_total": 4718592, "linear_dense_nnz": 1709568}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2920960, "linear_attention_total": 2359296, "linear_attention_nnz": 1180672, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1204224, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2516992, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 1600512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2151936, "linear_attention_total": 2359296, "linear_attention_nnz": 909312, "linear_dense_total": 4718592, "linear_dense_nnz": 1242624}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1889792, "linear_attention_total": 2359296, "linear_attention_nnz": 917504, "linear_dense_total": 4718592, "linear_dense_nnz": 972288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1398272, "linear_attention_total": 2359296, "linear_attention_nnz": 856064, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858624, "linear_attention_total": 2359296, "linear_attention_nnz": 611328, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 634368, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 827392, "linear_attention_total": 2359296, "linear_attention_nnz": 268288, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}}, "total_sparsity": 57.23077199706509, "linear_sparsity": 73.32356770833333}, "speed": {"eval_elapsed_time": 18.605645736912265}, "opt_eval_metrics": {"exact_match": 79.80132450331126, "f1": 87.48291010744668}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-10000": {"stats": {"total": 108893186, "nnz": 107798786, "linear_total": 84934656, "linear_nnz": 83840256, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6865920, "linear_attention_total": 2359296, "linear_attention_nnz": 2151936, "linear_dense_total": 4718592, "linear_dense_nnz": 4713984}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7009024, "linear_attention_total": 2359296, "linear_attention_nnz": 2299648, "linear_dense_total": 4718592, "linear_dense_nnz": 4709376}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7037952, "linear_attention_total": 2359296, "linear_attention_nnz": 2330112, "linear_dense_total": 4718592, "linear_dense_nnz": 4707840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7041024, "linear_attention_total": 2359296, "linear_attention_nnz": 2330112, "linear_dense_total": 4718592, "linear_dense_nnz": 4710912}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7029760, "linear_attention_total": 2359296, "linear_attention_nnz": 2324992, "linear_dense_total": 4718592, "linear_dense_nnz": 4704768}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7043584, "linear_attention_total": 2359296, "linear_attention_nnz": 2337280, "linear_dense_total": 4718592, "linear_dense_nnz": 4706304}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7006464, "linear_attention_total": 2359296, "linear_attention_nnz": 2321664, "linear_dense_total": 4718592, "linear_dense_nnz": 4684800}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7025664, "linear_attention_total": 2359296, "linear_attention_nnz": 2342400, "linear_dense_total": 4718592, "linear_dense_nnz": 4683264}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6950656, "linear_attention_total": 2359296, "linear_attention_nnz": 2296576, "linear_dense_total": 4718592, "linear_dense_nnz": 4654080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6905600, "linear_attention_total": 2359296, "linear_attention_nnz": 2259200, "linear_dense_total": 4718592, "linear_dense_nnz": 4646400}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6924288, "linear_attention_total": 2359296, "linear_attention_nnz": 2285568, "linear_dense_total": 4718592, "linear_dense_nnz": 4638720}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7000320, "linear_attention_total": 2359296, "linear_attention_nnz": 2312448, "linear_dense_total": 4718592, "linear_dense_nnz": 4687872}}, "total_sparsity": 1.005021563057218, "linear_sparsity": 1.288519965277779}, "speed": {"eval_elapsed_time": 41.11115196393803}, "opt_eval_metrics": {"exact_match": 80.90823084200568, "f1": 88.13888839423888}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-15000": {"stats": {"total": 108893186, "nnz": 104455426, "linear_total": 84934656, "linear_nnz": 80496896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6593280, "linear_attention_total": 2359296, "linear_attention_nnz": 1914624, "linear_dense_total": 4718592, "linear_dense_nnz": 4678656}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6772736, "linear_attention_total": 2359296, "linear_attention_nnz": 2103296, "linear_dense_total": 4718592, "linear_dense_nnz": 4669440}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6885632, "linear_attention_total": 2359296, "linear_attention_nnz": 2239232, "linear_dense_total": 4718592, "linear_dense_nnz": 4646400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6876672, "linear_attention_total": 2359296, "linear_attention_nnz": 2219520, "linear_dense_total": 4718592, "linear_dense_nnz": 4657152}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6873600, "linear_attention_total": 2359296, "linear_attention_nnz": 2216448, "linear_dense_total": 4718592, "linear_dense_nnz": 4657152}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6841856, "linear_attention_total": 2359296, "linear_attention_nnz": 2226176, "linear_dense_total": 4718592, "linear_dense_nnz": 4615680}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6786560, "linear_attention_total": 2359296, "linear_attention_nnz": 2190848, "linear_dense_total": 4718592, "linear_dense_nnz": 4595712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6811392, "linear_attention_total": 2359296, "linear_attention_nnz": 2261760, "linear_dense_total": 4718592, "linear_dense_nnz": 4549632}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6609408, "linear_attention_total": 2359296, "linear_attention_nnz": 2178048, "linear_dense_total": 4718592, "linear_dense_nnz": 4431360}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6399744, "linear_attention_total": 2359296, "linear_attention_nnz": 2049792, "linear_dense_total": 4718592, "linear_dense_nnz": 4349952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6406656, "linear_attention_total": 2359296, "linear_attention_nnz": 2053632, "linear_dense_total": 4718592, "linear_dense_nnz": 4353024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6639360, "linear_attention_total": 2359296, "linear_attention_nnz": 2100480, "linear_dense_total": 4718592, "linear_dense_nnz": 4538880}}, "total_sparsity": 4.075333051601593, "linear_sparsity": 5.224910783179015}, "speed": {"eval_elapsed_time": 40.64612381509505}, "opt_eval_metrics": {"exact_match": 78.21192052980132, "f1": 86.2154189083501}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 58295010, "linear_total": 84934656, "linear_nnz": 34364416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2712576, "linear_attention_total": 2359296, "linear_attention_nnz": 934912, "linear_dense_total": 4718592, "linear_dense_nnz": 1777664}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2907136, "linear_attention_total": 2359296, "linear_attention_nnz": 738304, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4201472, "linear_attention_total": 2359296, "linear_attention_nnz": 1162240, "linear_dense_total": 4718592, "linear_dense_nnz": 3039232}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4531200, "linear_attention_total": 2359296, "linear_attention_nnz": 1366016, "linear_dense_total": 4718592, "linear_dense_nnz": 3165184}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4667392, "linear_attention_total": 2359296, "linear_attention_nnz": 1484800, "linear_dense_total": 4718592, "linear_dense_nnz": 3182592}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4599808, "linear_attention_total": 2359296, "linear_attention_nnz": 1414144, "linear_dense_total": 4718592, "linear_dense_nnz": 3185664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3828736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 2572288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2659328, "linear_attention_total": 2359296, "linear_attention_nnz": 991232, "linear_dense_total": 4718592, "linear_dense_nnz": 1668096}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1654784, "linear_attention_total": 2359296, "linear_attention_nnz": 966656, "linear_dense_total": 4718592, "linear_dense_nnz": 688128}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 927744, "linear_attention_total": 2359296, "linear_attention_nnz": 691200, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 873472, "linear_attention_total": 2359296, "linear_attention_nnz": 530432, "linear_dense_total": 4718592, "linear_dense_nnz": 343040}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 800768, "linear_attention_total": 2359296, "linear_attention_nnz": 378880, "linear_dense_total": 4718592, "linear_dense_nnz": 421888}}, "total_sparsity": 46.46587895775224, "linear_sparsity": 59.540171682098766}, "speed": {"eval_elapsed_time": 25.710868231020868}, "opt_eval_metrics": {"exact_match": 79.64995269631031, "f1": 87.30139925832849}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 56885634, "linear_total": 84934656, "linear_nnz": 32956416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2584576, "linear_attention_total": 2359296, "linear_attention_nnz": 949248, "linear_dense_total": 4718592, "linear_dense_nnz": 1635328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2798592, "linear_attention_total": 2359296, "linear_attention_nnz": 750592, "linear_dense_total": 4718592, "linear_dense_nnz": 2048000}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4019200, "linear_attention_total": 2359296, "linear_attention_nnz": 1123328, "linear_dense_total": 4718592, "linear_dense_nnz": 2895872}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4274176, "linear_attention_total": 2359296, "linear_attention_nnz": 1306624, "linear_dense_total": 4718592, "linear_dense_nnz": 2967552}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4581376, "linear_attention_total": 2359296, "linear_attention_nnz": 1475584, "linear_dense_total": 4718592, "linear_dense_nnz": 3105792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4219904, "linear_attention_total": 2359296, "linear_attention_nnz": 1285120, "linear_dense_total": 4718592, "linear_dense_nnz": 2934784}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3736576, "linear_attention_total": 2359296, "linear_attention_nnz": 1235968, "linear_dense_total": 4718592, "linear_dense_nnz": 2500608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2587648, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1604608}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1627136, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 661504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 880640, "linear_attention_total": 2359296, "linear_attention_nnz": 650240, "linear_dense_total": 4718592, "linear_dense_nnz": 230400}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 862208, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 352256}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784384, "linear_attention_total": 2359296, "linear_attention_nnz": 363520, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}}, "total_sparsity": 47.76015277944021, "linear_sparsity": 61.19791666666667}, "speed": {"eval_elapsed_time": 25.134117686888203}, "opt_eval_metrics": {"exact_match": 79.90539262062441, "f1": 87.36378709007766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 55520034, "linear_total": 84934656, "linear_nnz": 31592448, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2382848, "linear_attention_total": 2359296, "linear_attention_nnz": 889856, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2568192, "linear_attention_total": 2359296, "linear_attention_nnz": 717824, "linear_dense_total": 4718592, "linear_dense_nnz": 1850368}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3915776, "linear_attention_total": 2359296, "linear_attention_nnz": 1113088, "linear_dense_total": 4718592, "linear_dense_nnz": 2802688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4258816, "linear_attention_total": 2359296, "linear_attention_nnz": 1297408, "linear_dense_total": 4718592, "linear_dense_nnz": 2961408}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4300800, "linear_attention_total": 2359296, "linear_attention_nnz": 1402880, "linear_dense_total": 4718592, "linear_dense_nnz": 2897920}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4030464, "linear_attention_total": 2359296, "linear_attention_nnz": 1157120, "linear_dense_total": 4718592, "linear_dense_nnz": 2873344}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3661824, "linear_attention_total": 2359296, "linear_attention_nnz": 1187840, "linear_dense_total": 4718592, "linear_dense_nnz": 2473984}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2507776, "linear_attention_total": 2359296, "linear_attention_nnz": 979968, "linear_dense_total": 4718592, "linear_dense_nnz": 1527808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1562624, "linear_attention_total": 2359296, "linear_attention_nnz": 952320, "linear_dense_total": 4718592, "linear_dense_nnz": 610304}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 865280, "linear_attention_total": 2359296, "linear_attention_nnz": 642048, "linear_dense_total": 4718592, "linear_dense_nnz": 223232}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818176, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 328704}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 719872, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 388096}}, "total_sparsity": 49.0142257386059, "linear_sparsity": 62.80381944444444}, "speed": {"eval_elapsed_time": 24.50548317306675}, "opt_eval_metrics": {"exact_match": 79.82024597918638, "f1": 87.30735739624531}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 55329122, "linear_total": 84934656, "linear_nnz": 31404032, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2845696, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 2385920}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3173376, "linear_attention_total": 2359296, "linear_attention_nnz": 374784, "linear_dense_total": 4718592, "linear_dense_nnz": 2798592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3866624, "linear_attention_total": 2359296, "linear_attention_nnz": 411648, "linear_dense_total": 4718592, "linear_dense_nnz": 3454976}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4224000, "linear_attention_total": 2359296, "linear_attention_nnz": 727040, "linear_dense_total": 4718592, "linear_dense_nnz": 3496960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3954688, "linear_attention_total": 2359296, "linear_attention_nnz": 541696, "linear_dense_total": 4718592, "linear_dense_nnz": 3412992}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3993600, "linear_attention_total": 2359296, "linear_attention_nnz": 545792, "linear_dense_total": 4718592, "linear_dense_nnz": 3447808}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3427328, "linear_attention_total": 2359296, "linear_attention_nnz": 493568, "linear_dense_total": 4718592, "linear_dense_nnz": 2933760}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2641920, "linear_attention_total": 2359296, "linear_attention_nnz": 641024, "linear_dense_total": 4718592, "linear_dense_nnz": 2000896}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1293312, "linear_attention_total": 2359296, "linear_attention_nnz": 288768, "linear_dense_total": 4718592, "linear_dense_nnz": 1004544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 678912, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 339968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 671744, "linear_attention_total": 2359296, "linear_attention_nnz": 254976, "linear_dense_total": 4718592, "linear_dense_nnz": 416768}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 632832, "linear_attention_total": 2359296, "linear_attention_nnz": 165888, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}}, "total_sparsity": 49.1895461668281, "linear_sparsity": 63.025655864197525}, "speed": {"eval_elapsed_time": 21.893441491993144}, "opt_eval_metrics": {"exact_match": 77.68211920529801, "f1": 86.11161494070976}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 54788706, "linear_total": 84934656, "linear_nnz": 30864384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2661376, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 2226176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3087360, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 2727936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3895296, "linear_attention_total": 2359296, "linear_attention_nnz": 421888, "linear_dense_total": 4718592, "linear_dense_nnz": 3473408}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4162560, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 3451904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3985408, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 3437568}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3881984, "linear_attention_total": 2359296, "linear_attention_nnz": 556032, "linear_dense_total": 4718592, "linear_dense_nnz": 3325952}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3340288, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 2828288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2614272, "linear_attention_total": 2359296, "linear_attention_nnz": 622592, "linear_dense_total": 4718592, "linear_dense_nnz": 1991680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1256448, "linear_attention_total": 2359296, "linear_attention_nnz": 276480, "linear_dense_total": 4718592, "linear_dense_nnz": 979968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668672, "linear_attention_total": 2359296, "linear_attention_nnz": 337920, "linear_dense_total": 4718592, "linear_dense_nnz": 330752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 664576, "linear_attention_total": 2359296, "linear_attention_nnz": 252928, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 646144, "linear_attention_total": 2359296, "linear_attention_nnz": 158720, "linear_dense_total": 4718592, "linear_dense_nnz": 487424}}, "total_sparsity": 49.68582699012958, "linear_sparsity": 63.66102430555556}, "speed": {"eval_elapsed_time": 21.611296633956954}, "opt_eval_metrics": {"exact_match": 77.96594134342479, "f1": 86.01491496793933}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53430466, "linear_total": 84934656, "linear_nnz": 29507584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2533376, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 2119680}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2840576, "linear_attention_total": 2359296, "linear_attention_nnz": 364544, "linear_dense_total": 4718592, "linear_dense_nnz": 2476032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3843072, "linear_attention_total": 2359296, "linear_attention_nnz": 397312, "linear_dense_total": 4718592, "linear_dense_nnz": 3445760}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4069376, "linear_attention_total": 2359296, "linear_attention_nnz": 666624, "linear_dense_total": 4718592, "linear_dense_nnz": 3402752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3831808, "linear_attention_total": 2359296, "linear_attention_nnz": 492544, "linear_dense_total": 4718592, "linear_dense_nnz": 3339264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3714048, "linear_attention_total": 2359296, "linear_attention_nnz": 519168, "linear_dense_total": 4718592, "linear_dense_nnz": 3194880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3200000, "linear_attention_total": 2359296, "linear_attention_nnz": 448512, "linear_dense_total": 4718592, "linear_dense_nnz": 2751488}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415616, "linear_attention_total": 2359296, "linear_attention_nnz": 576512, "linear_dense_total": 4718592, "linear_dense_nnz": 1839104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211392, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619520, "linear_attention_total": 2359296, "linear_attention_nnz": 317440, "linear_dense_total": 4718592, "linear_dense_nnz": 302080}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 623616, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 386048}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 50.93314103235074, "linear_sparsity": 65.25848765432099}, "speed": {"eval_elapsed_time": 21.071897589135915}, "opt_eval_metrics": {"exact_match": 77.70104068117313, "f1": 85.88451743537976}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 40733175, "linear_total": 84934656, "linear_nnz": 16822784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1394688, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1640960, "linear_attention_total": 2359296, "linear_attention_nnz": 539648, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1878528, "linear_attention_total": 2359296, "linear_attention_nnz": 657408, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 864256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1902592, "linear_attention_total": 2359296, "linear_attention_nnz": 686080, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601536, "linear_attention_total": 2359296, "linear_attention_nnz": 649216, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1507328, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908800, "linear_attention_total": 2359296, "linear_attention_nnz": 474112, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 354304, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591872, "linear_attention_total": 2359296, "linear_attention_nnz": 226304, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 62.593458327135366, "linear_sparsity": 80.19326292438271}, "speed": {"eval_elapsed_time": 16.06849605194293}, "opt_eval_metrics": {"exact_match": 78.68495742667928, "f1": 86.66781681977909}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40239113, "linear_total": 84934656, "linear_nnz": 16329216, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1344512, "linear_attention_total": 2359296, "linear_attention_nnz": 518144, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 516096, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842176, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2097664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1184256}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2056192, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 664576, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1565184, "linear_attention_total": 2359296, "linear_attention_nnz": 629760, "linear_dense_total": 4718592, "linear_dense_nnz": 935424}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1486336, "linear_attention_total": 2359296, "linear_attention_nnz": 787456, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 844288, "linear_attention_total": 2359296, "linear_attention_nnz": 415744, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 423936, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 472064, "linear_attention_total": 2359296, "linear_attention_nnz": 324608, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555520, "linear_attention_total": 2359296, "linear_attention_nnz": 209920, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}}, "total_sparsity": 63.04717083032174, "linear_sparsity": 80.7743778935185}, "speed": {"eval_elapsed_time": 15.895570316817611}, "opt_eval_metrics": {"exact_match": 78.80794701986756, "f1": 86.74156854566804}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.9999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-5000": {"stats": {"total": 108893186, "nnz": 108881410, "linear_total": 84934656, "linear_nnz": 84922880, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7072768, "linear_attention_total": 2359296, "linear_attention_nnz": 2354176, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7076352, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4717056}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7076864, "linear_attention_total": 2359296, "linear_attention_nnz": 2358272, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7076864, "linear_attention_total": 2359296, "linear_attention_nnz": 2358272, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7074816, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4715520}}, "total_sparsity": 0.010814267111258768, "linear_sparsity": 0.013864776234573384}, "speed": {"eval_elapsed_time": 40.132621727185324}, "opt_eval_metrics": {"exact_match": 78.7038789025544, "f1": 86.6699349353281}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 43189250, "linear_total": 84934656, "linear_nnz": 19230720, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1584896, "linear_attention_total": 2359296, "linear_attention_nnz": 494336, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1917184, "linear_attention_total": 2359296, "linear_attention_nnz": 631552, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2092032, "linear_attention_total": 2359296, "linear_attention_nnz": 648192, "linear_dense_total": 4718592, "linear_dense_nnz": 1443840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2466816, "linear_attention_total": 2359296, "linear_attention_nnz": 1047552, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2403328, "linear_attention_total": 2359296, "linear_attention_nnz": 942592, "linear_dense_total": 4718592, "linear_dense_nnz": 1460736}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2211072, "linear_attention_total": 2359296, "linear_attention_nnz": 837888, "linear_dense_total": 4718592, "linear_dense_nnz": 1373184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1936640, "linear_attention_total": 2359296, "linear_attention_nnz": 841472, "linear_dense_total": 4718592, "linear_dense_nnz": 1095168}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661440, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 827904}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1084160, "linear_attention_total": 2359296, "linear_attention_nnz": 621824, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621056, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 188928}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 568064, "linear_attention_total": 2359296, "linear_attention_nnz": 411392, "linear_dense_total": 4718592, "linear_dense_nnz": 156672}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 684032, "linear_attention_total": 2359296, "linear_attention_nnz": 223232, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 60.33796825450584, "linear_sparsity": 77.3582175925926}, "speed": {"eval_elapsed_time": 18.20940860803239}, "opt_eval_metrics": {"exact_match": 78.73226111636707, "f1": 86.74884583609185}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42070530, "linear_total": 84934656, "linear_nnz": 18112000, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451008, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 970752}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835264, "linear_attention_total": 2359296, "linear_attention_nnz": 620288, "linear_dense_total": 4718592, "linear_dense_nnz": 1214976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2000384, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1374720}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2271232, "linear_attention_total": 2359296, "linear_attention_nnz": 933376, "linear_dense_total": 4718592, "linear_dense_nnz": 1337856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2267904, "linear_attention_total": 2359296, "linear_attention_nnz": 862464, "linear_dense_total": 4718592, "linear_dense_nnz": 1405440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081536, "linear_attention_total": 2359296, "linear_attention_nnz": 783616, "linear_dense_total": 4718592, "linear_dense_nnz": 1297920}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1807104, "linear_attention_total": 2359296, "linear_attention_nnz": 773376, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602048, "linear_attention_total": 2359296, "linear_attention_nnz": 811008, "linear_dense_total": 4718592, "linear_dense_nnz": 791040}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1009920, "linear_attention_total": 2359296, "linear_attention_nnz": 572160, "linear_dense_total": 4718592, "linear_dense_nnz": 437760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 543232, "linear_attention_total": 2359296, "linear_attention_nnz": 392704, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649472, "linear_attention_total": 2359296, "linear_attention_nnz": 214784, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}}, "total_sparsity": 61.365323630075444, "linear_sparsity": 78.67537133487654}, "speed": {"eval_elapsed_time": 18.07671318203211}, "opt_eval_metrics": {"exact_match": 78.97824030274361, "f1": 86.77789246016766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41670402, "linear_total": 84934656, "linear_nnz": 17711872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1409024, "linear_attention_total": 2359296, "linear_attention_nnz": 468992, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1792000, "linear_attention_total": 2359296, "linear_attention_nnz": 606208, "linear_dense_total": 4718592, "linear_dense_nnz": 1185792}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1974272, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1348608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2231552, "linear_attention_total": 2359296, "linear_attention_nnz": 910592, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2209536, "linear_attention_total": 2359296, "linear_attention_nnz": 828672, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2046464, "linear_attention_total": 2359296, "linear_attention_nnz": 765440, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1764096, "linear_attention_total": 2359296, "linear_attention_nnz": 761088, "linear_dense_total": 4718592, "linear_dense_nnz": 1003008}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1573120, "linear_attention_total": 2359296, "linear_attention_nnz": 792832, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 986880, "linear_attention_total": 2359296, "linear_attention_nnz": 553728, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 572672, "linear_attention_total": 2359296, "linear_attention_nnz": 389888, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 525568, "linear_attention_total": 2359296, "linear_attention_nnz": 378112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}}, "total_sparsity": 61.73277361909495, "linear_sparsity": 79.14647231867285}, "speed": {"eval_elapsed_time": 17.641912897117436}, "opt_eval_metrics": {"exact_match": 78.74172185430463, "f1": 86.69521763053608}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41478658, "linear_total": 84934656, "linear_nnz": 17520128, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404160, "linear_attention_total": 2359296, "linear_attention_nnz": 465664, "linear_dense_total": 4718592, "linear_dense_nnz": 938496}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1766912, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1182720}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1961216, "linear_attention_total": 2359296, "linear_attention_nnz": 615680, "linear_dense_total": 4718592, "linear_dense_nnz": 1345536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 895488, "linear_dense_total": 4718592, "linear_dense_nnz": 1314816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2189824, "linear_attention_total": 2359296, "linear_attention_nnz": 812032, "linear_dense_total": 4718592, "linear_dense_nnz": 1377792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2038016, "linear_attention_total": 2359296, "linear_attention_nnz": 755456, "linear_dense_total": 4718592, "linear_dense_nnz": 1282560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1738240, "linear_attention_total": 2359296, "linear_attention_nnz": 739840, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571584, "linear_attention_total": 2359296, "linear_attention_nnz": 797440, "linear_dense_total": 4718592, "linear_dense_nnz": 774144}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 943872, "linear_attention_total": 2359296, "linear_attention_nnz": 513792, "linear_dense_total": 4718592, "linear_dense_nnz": 430080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563968, "linear_attention_total": 2359296, "linear_attention_nnz": 381184, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516352, "linear_attention_total": 2359296, "linear_attention_nnz": 370432, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 615680, "linear_attention_total": 2359296, "linear_attention_nnz": 200960, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}}, "total_sparsity": 61.90885809879785, "linear_sparsity": 79.37222704475309}, "speed": {"eval_elapsed_time": 17.640617809956893}, "opt_eval_metrics": {"exact_match": 78.84578997161779, "f1": 86.78133258210022}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40218943, "linear_total": 84934656, "linear_nnz": 16260413, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725694, "linear_attention_total": 2359296, "linear_attention_nnz": 36794, "linear_dense_total": 4718592, "linear_dense_nnz": 1688900}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959620, "linear_attention_total": 2359296, "linear_attention_nnz": 233028, "linear_dense_total": 4718592, "linear_dense_nnz": 1726592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969125, "linear_attention_total": 2359296, "linear_attention_nnz": 194318, "linear_dense_total": 4718592, "linear_dense_nnz": 1774807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2012358, "linear_attention_total": 2359296, "linear_attention_nnz": 270153, "linear_dense_total": 4718592, "linear_dense_nnz": 1742205}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860862, "linear_attention_total": 2359296, "linear_attention_nnz": 207935, "linear_dense_total": 4718592, "linear_dense_nnz": 1652927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815188, "linear_attention_total": 2359296, "linear_attention_nnz": 215427, "linear_dense_total": 4718592, "linear_dense_nnz": 1599761}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518978, "linear_attention_total": 2359296, "linear_attention_nnz": 114563, "linear_dense_total": 4718592, "linear_dense_nnz": 1404415}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307646, "linear_attention_total": 2359296, "linear_attention_nnz": 165011, "linear_dense_total": 4718592, "linear_dense_nnz": 1142635}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 946142, "linear_attention_total": 2359296, "linear_attention_nnz": 86589, "linear_dense_total": 4718592, "linear_dense_nnz": 859553}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531809, "linear_attention_total": 2359296, "linear_attention_nnz": 110020, "linear_dense_total": 4718592, "linear_dense_nnz": 421789}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 419075, "linear_attention_total": 2359296, "linear_attention_nnz": 89475, "linear_dense_total": 4718592, "linear_dense_nnz": 329600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 193916, "linear_attention_total": 2359296, "linear_attention_nnz": 45791, "linear_dense_total": 4718592, "linear_dense_nnz": 148125}}, "total_sparsity": 63.065693568741764, "linear_sparsity": 80.85538487375518}, "speed": {"eval_elapsed_time": 25.115268317982554}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.30981160352648}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl5_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 81807426, "linear_total": 84934656, "linear_nnz": 57862144, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5237760, "linear_attention_total": 2359296, "linear_attention_nnz": 921600, "linear_dense_total": 4718592, "linear_dense_nnz": 4316160}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5140480, "linear_attention_total": 2359296, "linear_attention_nnz": 829440, "linear_dense_total": 4718592, "linear_dense_nnz": 4311040}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5641216, "linear_attention_total": 2359296, "linear_attention_nnz": 1221632, "linear_dense_total": 4718592, "linear_dense_nnz": 4419584}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5816320, "linear_attention_total": 2359296, "linear_attention_nnz": 1386496, "linear_dense_total": 4718592, "linear_dense_nnz": 4429824}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5997568, "linear_attention_total": 2359296, "linear_attention_nnz": 1540096, "linear_dense_total": 4718592, "linear_dense_nnz": 4457472}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5968896, "linear_attention_total": 2359296, "linear_attention_nnz": 1548288, "linear_dense_total": 4718592, "linear_dense_nnz": 4420608}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5685248, "linear_attention_total": 2359296, "linear_attention_nnz": 1364992, "linear_dense_total": 4718592, "linear_dense_nnz": 4320256}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5458944, "linear_attention_total": 2359296, "linear_attention_nnz": 1272832, "linear_dense_total": 4718592, "linear_dense_nnz": 4186112}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4961280, "linear_attention_total": 2359296, "linear_attention_nnz": 1173504, "linear_dense_total": 4718592, "linear_dense_nnz": 3787776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3566592, "linear_attention_total": 2359296, "linear_attention_nnz": 727040, "linear_dense_total": 4718592, "linear_dense_nnz": 2839552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673664, "linear_attention_total": 2359296, "linear_attention_nnz": 671744, "linear_dense_total": 4718592, "linear_dense_nnz": 2001920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1714176, "linear_attention_total": 2359296, "linear_attention_nnz": 409600, "linear_dense_total": 4718592, "linear_dense_nnz": 1304576}}, "total_sparsity": 24.873695953757846, "linear_sparsity": 31.87451774691358}, "speed": {"eval_elapsed_time": 32.08257991797291}, "opt_eval_metrics": {"exact_match": 81.47587511825922, "f1": 88.73698799207777}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl5_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 81295202, "linear_total": 84934656, "linear_nnz": 57351168, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5262336, "linear_attention_total": 2359296, "linear_attention_nnz": 804864, "linear_dense_total": 4718592, "linear_dense_nnz": 4457472}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5235712, "linear_attention_total": 2359296, "linear_attention_nnz": 771072, "linear_dense_total": 4718592, "linear_dense_nnz": 4464640}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5669888, "linear_attention_total": 2359296, "linear_attention_nnz": 1152000, "linear_dense_total": 4718592, "linear_dense_nnz": 4517888}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5875712, "linear_attention_total": 2359296, "linear_attention_nnz": 1312768, "linear_dense_total": 4718592, "linear_dense_nnz": 4562944}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6056960, "linear_attention_total": 2359296, "linear_attention_nnz": 1501184, "linear_dense_total": 4718592, "linear_dense_nnz": 4555776}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5898240, "linear_attention_total": 2359296, "linear_attention_nnz": 1377280, "linear_dense_total": 4718592, "linear_dense_nnz": 4520960}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5831680, "linear_attention_total": 2359296, "linear_attention_nnz": 1357824, "linear_dense_total": 4718592, "linear_dense_nnz": 4473856}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5483520, "linear_attention_total": 2359296, "linear_attention_nnz": 1192960, "linear_dense_total": 4718592, "linear_dense_nnz": 4290560}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4958208, "linear_attention_total": 2359296, "linear_attention_nnz": 1069056, "linear_dense_total": 4718592, "linear_dense_nnz": 3889152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3236864, "linear_attention_total": 2359296, "linear_attention_nnz": 718848, "linear_dense_total": 4718592, "linear_dense_nnz": 2518016}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2222080, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 1607680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1619968, "linear_attention_total": 2359296, "linear_attention_nnz": 389120, "linear_dense_total": 4718592, "linear_dense_nnz": 1230848}}, "total_sparsity": 25.344087186502197, "linear_sparsity": 32.47612847222222}, "speed": {"eval_elapsed_time": 31.65403198893182}, "opt_eval_metrics": {"exact_match": 81.51371807000946, "f1": 88.67903677006836}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39702836, "linear_total": 84934656, "linear_nnz": 15791104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125888, "linear_attention_total": 2359296, "linear_attention_nnz": 838656, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1188352, "linear_attention_total": 2359296, "linear_attention_nnz": 692224, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1694720, "linear_attention_total": 2359296, "linear_attention_nnz": 1089536, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1962496, "linear_attention_total": 2359296, "linear_attention_nnz": 1291264, "linear_dense_total": 4718592, "linear_dense_nnz": 671232}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2112512, "linear_attention_total": 2359296, "linear_attention_nnz": 1384448, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1783296, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1632768, "linear_attention_total": 2359296, "linear_attention_nnz": 1127424, "linear_dense_total": 4718592, "linear_dense_nnz": 505344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1333760, "linear_attention_total": 2359296, "linear_attention_nnz": 942080, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1204736, "linear_attention_total": 2359296, "linear_attention_nnz": 982016, "linear_dense_total": 4718592, "linear_dense_nnz": 222720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 729600, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 573952, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 449024, "linear_attention_total": 2359296, "linear_attention_nnz": 293888, "linear_dense_total": 4718592, "linear_dense_nnz": 155136}}, "total_sparsity": 63.53965068117302, "linear_sparsity": 81.40793788580247}, "speed": {"eval_elapsed_time": 17.754389239940792}, "opt_eval_metrics": {"exact_match": 79.20529801324503, "f1": 87.11181141207972}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41908226, "linear_total": 84934656, "linear_nnz": 17949696, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081280, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 606720}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529856, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 841728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2437632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2115072, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1927680, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1448448, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 668160}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665088, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 173568}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 595968, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.51437244200017, "linear_sparsity": 78.86646412037037}, "speed": {"eval_elapsed_time": 12.991677745943889}, "opt_eval_metrics": {"exact_match": 78.05108798486282, "f1": 85.81174728555466}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41954306, "linear_total": 84934656, "linear_nnz": 17995776, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2079744, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1626624, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 840192}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434560, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1058304}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2116608, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1035264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1829376, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886208, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1099776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497600, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1210368, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 764928, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 546816, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.4720557446083, "linear_sparsity": 78.81221064814815}, "speed": {"eval_elapsed_time": 12.87139375694096}, "opt_eval_metrics": {"exact_match": 77.62535477767265, "f1": 85.49958980627748}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 36346370, "linear_total": 84934656, "linear_nnz": 12387840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1721856, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 950784, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1857024, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437696, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1113600, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1015296, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 425472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 731136, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 999936, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 213504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 296448, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 99840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 614400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 122880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}}, "total_sparsity": 66.62199781720042, "linear_sparsity": 85.4148582175926}, "speed": {"eval_elapsed_time": 11.044030340854079}, "opt_eval_metrics": {"exact_match": 76.75496688741723, "f1": 84.83470649534952}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 35750402, "linear_total": 84934656, "linear_nnz": 11791872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893952, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1247232, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 520704}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 978432, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 388608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 365568, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}}, "total_sparsity": 67.16929377013544, "linear_sparsity": 86.11653645833334}, "speed": {"eval_elapsed_time": 10.875461397925392}, "opt_eval_metrics": {"exact_match": 76.3197729422895, "f1": 84.62201750681498}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 35662850, "linear_total": 84934656, "linear_nnz": 11704320, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 897024, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 258048}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804800, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1184256, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1064448, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 474624}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 364032, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}}, "total_sparsity": 67.24969549518002, "linear_sparsity": 86.21961805555556}, "speed": {"eval_elapsed_time": 10.863983491901308}, "opt_eval_metrics": {"exact_match": 76.5279091769158, "f1": 84.6776690586996}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35638274, "linear_total": 84934656, "linear_nnz": 11679744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1586688, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 210432}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887808, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1175040, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 486912}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1062912, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 290304, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 93696}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360960, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 164352}}, "total_sparsity": 67.27226440045568, "linear_sparsity": 86.24855324074075}, "speed": {"eval_elapsed_time": 10.856388033833355}, "opt_eval_metrics": {"exact_match": 76.31031220435194, "f1": 84.63605545666391}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33844482, "linear_total": 84934656, "linear_nnz": 9885952, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701184, "linear_attention_total": 2359296, "linear_attention_nnz": 295680, "linear_dense_total": 4718592, "linear_dense_nnz": 405504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042688, "linear_attention_total": 2359296, "linear_attention_nnz": 380672, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1087744, "linear_attention_total": 2359296, "linear_attention_nnz": 328960, "linear_dense_total": 4718592, "linear_dense_nnz": 758784}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1340672, "linear_attention_total": 2359296, "linear_attention_nnz": 612608, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1142784, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 811008}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1165312, "linear_attention_total": 2359296, "linear_attention_nnz": 411136, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908032, "linear_attention_total": 2359296, "linear_attention_nnz": 319744, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 956672, "linear_attention_total": 2359296, "linear_attention_nnz": 457472, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 557056, "linear_attention_total": 2359296, "linear_attention_nnz": 246784, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360192, "linear_attention_total": 2359296, "linear_attention_nnz": 252672, "linear_dense_total": 4718592, "linear_dense_nnz": 107520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315904, "linear_attention_total": 2359296, "linear_attention_nnz": 202240, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 307712, "linear_attention_total": 2359296, "linear_attention_nnz": 129536, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}}, "total_sparsity": 68.91955939281638, "linear_sparsity": 88.36052035108025}, "speed": {"eval_elapsed_time": 14.372085305862129}, "opt_eval_metrics": {"exact_match": 76.33869441816462, "f1": 84.90005817955239}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 46753113, "linear_total": 84934656, "linear_nnz": 22841856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2904576, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1430016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2368512, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1582080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3084288, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1904640}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2600448, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1715712}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2244096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1654272}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096640, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1703424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1910784, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1476096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1082880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1534464, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 748032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523776, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 327168}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 990720, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1107456, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 812544}}, "total_sparsity": 57.06516200196401, "linear_sparsity": 73.10655381944444}, "speed": {"eval_elapsed_time": 13.856825530063361}, "opt_eval_metrics": {"exact_match": 78.31598864711448, "f1": 86.14732314693939}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 46580969, "linear_total": 84934656, "linear_nnz": 22669824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2883072, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1408512}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2342400, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1555968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3055104, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1875456}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2585088, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1700352}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1635840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2082816, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1689600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1901568, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1311744}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1468416, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1075200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1528320, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 741888}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520704, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 324096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1093632, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}}, "total_sparsity": 57.22324719197764, "linear_sparsity": 73.30910011574075}, "speed": {"eval_elapsed_time": 13.743963541928679}, "opt_eval_metrics": {"exact_match": 77.96594134342479, "f1": 85.85795020085484}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46293486, "linear_total": 84934656, "linear_nnz": 22382592, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850816, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1376256}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2323968, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1537536}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3022848, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1843200}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2557440, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1672704}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1620480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2068992, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1675776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790976, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1299456}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1462272, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1523712, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 737280}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 322560}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 970752, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 786432}}, "total_sparsity": 57.487251773494805, "linear_sparsity": 73.6472800925926}, "speed": {"eval_elapsed_time": 13.430293028010055}, "opt_eval_metrics": {"exact_match": 77.88079470198676, "f1": 85.81326419854291}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl300_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 26853628, "linear_total": 84934656, "linear_nnz": 2895098, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 357857, "linear_attention_total": 2359296, "linear_attention_nnz": 13195, "linear_dense_total": 4718592, "linear_dense_nnz": 344662}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 405482, "linear_attention_total": 2359296, "linear_attention_nnz": 53357, "linear_dense_total": 4718592, "linear_dense_nnz": 352125}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 395119, "linear_attention_total": 2359296, "linear_attention_nnz": 43981, "linear_dense_total": 4718592, "linear_dense_nnz": 351138}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 394117, "linear_attention_total": 2359296, "linear_attention_nnz": 71058, "linear_dense_total": 4718592, "linear_dense_nnz": 323059}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 335373, "linear_attention_total": 2359296, "linear_attention_nnz": 47705, "linear_dense_total": 4718592, "linear_dense_nnz": 287668}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 292526, "linear_attention_total": 2359296, "linear_attention_nnz": 40348, "linear_dense_total": 4718592, "linear_dense_nnz": 252178}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 238114, "linear_attention_total": 2359296, "linear_attention_nnz": 33002, "linear_dense_total": 4718592, "linear_dense_nnz": 205112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 188891, "linear_attention_total": 2359296, "linear_attention_nnz": 38753, "linear_dense_total": 4718592, "linear_dense_nnz": 150138}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 123365, "linear_attention_total": 2359296, "linear_attention_nnz": 22052, "linear_dense_total": 4718592, "linear_dense_nnz": 101313}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64415, "linear_attention_total": 2359296, "linear_attention_nnz": 28498, "linear_dense_total": 4718592, "linear_dense_nnz": 35917}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 53470, "linear_attention_total": 2359296, "linear_attention_nnz": 18747, "linear_dense_total": 4718592, "linear_dense_nnz": 34723}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 46369, "linear_attention_total": 2359296, "linear_attention_nnz": 15957, "linear_dense_total": 4718592, "linear_dense_nnz": 30412}}, "total_sparsity": 75.33947808267818, "linear_sparsity": 96.59138196780358}, "speed": {"eval_elapsed_time": 25.971711199032143}, "opt_eval_metrics": {"exact_match": 76.98202459791864, "f1": 85.40699359564026}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 40641026, "linear_total": 84934656, "linear_nnz": 16682496, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1930752, "linear_attention_total": 2359296, "linear_attention_nnz": 1390080, "linear_dense_total": 4718592, "linear_dense_nnz": 540672}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1347840, "linear_attention_total": 2359296, "linear_attention_nnz": 622848, "linear_dense_total": 4718592, "linear_dense_nnz": 724992}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2423808, "linear_attention_total": 2359296, "linear_attention_nnz": 1506816, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1864704, "linear_attention_total": 2359296, "linear_attention_nnz": 966144, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1956096, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 990720}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742592, "linear_attention_total": 2359296, "linear_attention_nnz": 734976, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1323264, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835584, "linear_attention_total": 2359296, "linear_attention_nnz": 282624, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307904, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 403968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 536064, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 506880, "linear_attention_total": 2359296, "linear_attention_nnz": 322560, "linear_dense_total": 4718592, "linear_dense_nnz": 184320}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 761088, "linear_attention_total": 2359296, "linear_attention_nnz": 412416, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}}, "total_sparsity": 62.67808162027695, "linear_sparsity": 80.3584346064815}, "speed": {"eval_elapsed_time": 13.440584641881287}, "opt_eval_metrics": {"exact_match": 76.13055818353831, "f1": 84.59415607632204}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 39460610, "linear_total": 84934656, "linear_nnz": 15502080, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1801728, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1127424, "linear_attention_total": 2359296, "linear_attention_nnz": 471552, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2341632, "linear_attention_total": 2359296, "linear_attention_nnz": 1507584, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804032, "linear_attention_total": 2359296, "linear_attention_nnz": 960768, "linear_dense_total": 4718592, "linear_dense_nnz": 843264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899264, "linear_attention_total": 2359296, "linear_attention_nnz": 968448, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529088, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 564480, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738048, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 506880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 893952, "linear_dense_total": 4718592, "linear_dense_nnz": 384000}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668928, "linear_attention_total": 2359296, "linear_attention_nnz": 535296, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488448, "linear_attention_total": 2359296, "linear_attention_nnz": 319488, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 560640, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}}, "total_sparsity": 63.76209435179903, "linear_sparsity": 81.7482277199074}, "speed": {"eval_elapsed_time": 13.018812068970874}, "opt_eval_metrics": {"exact_match": 76.20624408703878, "f1": 84.78885528858153}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 39496706, "linear_total": 84934656, "linear_nnz": 15538176, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798656, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1122816, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 649728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2325504, "linear_attention_total": 2359296, "linear_attention_nnz": 1500672, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790208, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886976, "linear_attention_total": 2359296, "linear_attention_nnz": 963840, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522944, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 924672}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 558336, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 737280, "linear_attention_total": 2359296, "linear_attention_nnz": 235008, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1286400, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 382464}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665856, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 417792, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 559104, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 301056}}, "total_sparsity": 63.72894627217538, "linear_sparsity": 81.70572916666666}, "speed": {"eval_elapsed_time": 12.96851964481175}, "opt_eval_metrics": {"exact_match": 75.67644276253547, "f1": 84.4740049617883}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39419906, "linear_total": 84934656, "linear_nnz": 15461376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1800192, "linear_attention_total": 2359296, "linear_attention_nnz": 1334784, "linear_dense_total": 4718592, "linear_dense_nnz": 465408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1118976, "linear_attention_total": 2359296, "linear_attention_nnz": 473856, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2320896, "linear_attention_total": 2359296, "linear_attention_nnz": 1497600, "linear_dense_total": 4718592, "linear_dense_nnz": 823296}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1788672, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 832512}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1891584, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1520640, "linear_attention_total": 2359296, "linear_attention_nnz": 600576, "linear_dense_total": 4718592, "linear_dense_nnz": 920064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 732672, "linear_attention_total": 2359296, "linear_attention_nnz": 230400, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277184, "linear_attention_total": 2359296, "linear_attention_nnz": 897792, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 660480, "linear_attention_total": 2359296, "linear_attention_nnz": 528384, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555264, "linear_attention_total": 2359296, "linear_attention_nnz": 257280, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.79947410116185, "linear_sparsity": 81.79615162037037}, "speed": {"eval_elapsed_time": 12.973318020114675}, "opt_eval_metrics": {"exact_match": 76.0170293282876, "f1": 84.48208063503463}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39382274, "linear_total": 84934656, "linear_nnz": 15423744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1793280, "linear_attention_total": 2359296, "linear_attention_nnz": 1323264, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1115136, "linear_attention_total": 2359296, "linear_attention_nnz": 470016, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2321664, "linear_attention_total": 2359296, "linear_attention_nnz": 1496832, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1789440, "linear_attention_total": 2359296, "linear_attention_nnz": 960000, "linear_dense_total": 4718592, "linear_dense_nnz": 829440}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1843968, "linear_attention_total": 2359296, "linear_attention_nnz": 917760, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526016, "linear_attention_total": 2359296, "linear_attention_nnz": 607488, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 567552, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730368, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1279488, "linear_attention_total": 2359296, "linear_attention_nnz": 900096, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667392, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 556032, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.83403273736522, "linear_sparsity": 81.84045862268519}, "speed": {"eval_elapsed_time": 12.801363392965868}, "opt_eval_metrics": {"exact_match": 75.93188268684958, "f1": 84.50981123274157}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 45409666, "linear_total": 84934656, "linear_nnz": 21492736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1715200, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 1234944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895424, "linear_attention_total": 2359296, "linear_attention_nnz": 400384, "linear_dense_total": 4718592, "linear_dense_nnz": 1495040}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3012608, "linear_attention_total": 2359296, "linear_attention_nnz": 594944, "linear_dense_total": 4718592, "linear_dense_nnz": 2417664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3094528, "linear_attention_total": 2359296, "linear_attention_nnz": 813056, "linear_dense_total": 4718592, "linear_dense_nnz": 2281472}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2762752, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 2163712}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2707456, "linear_attention_total": 2359296, "linear_attention_nnz": 562176, "linear_dense_total": 4718592, "linear_dense_nnz": 2145280}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2233344, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1741824, "linear_attention_total": 2359296, "linear_attention_nnz": 678912, "linear_dense_total": 4718592, "linear_dense_nnz": 1062912}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 709632, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 370688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 524288, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 164864}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 593920, "linear_attention_total": 2359296, "linear_attention_nnz": 267264, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 163840, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}}, "total_sparsity": 58.29889117212532, "linear_sparsity": 74.6949749228395}, "speed": {"eval_elapsed_time": 18.650014573941007}, "opt_eval_metrics": {"exact_match": 77.01986754966887, "f1": 85.2617013700351}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 44308674, "linear_total": 84934656, "linear_nnz": 20392960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1598464, "linear_attention_total": 2359296, "linear_attention_nnz": 458752, "linear_dense_total": 4718592, "linear_dense_nnz": 1139712}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825792, "linear_attention_total": 2359296, "linear_attention_nnz": 398336, "linear_dense_total": 4718592, "linear_dense_nnz": 1427456}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2854912, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 2257920}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2905088, "linear_attention_total": 2359296, "linear_attention_nnz": 781312, "linear_dense_total": 4718592, "linear_dense_nnz": 2123776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2643968, "linear_attention_total": 2359296, "linear_attention_nnz": 620544, "linear_dense_total": 4718592, "linear_dense_nnz": 2023424}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2543616, "linear_attention_total": 2359296, "linear_attention_nnz": 573440, "linear_dense_total": 4718592, "linear_dense_nnz": 1970176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2049024, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 1588224}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1708032, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 666624, "linear_attention_total": 2359296, "linear_attention_nnz": 307200, "linear_dense_total": 4718592, "linear_dense_nnz": 359424}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489472, "linear_attention_total": 2359296, "linear_attention_nnz": 327680, "linear_dense_total": 4718592, "linear_dense_nnz": 161792}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598016, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 509952, "linear_attention_total": 2359296, "linear_attention_nnz": 162816, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}}, "total_sparsity": 59.309966373837206, "linear_sparsity": 75.98982445987654}, "speed": {"eval_elapsed_time": 18.39338346105069}, "opt_eval_metrics": {"exact_match": 76.98202459791864, "f1": 85.22056943761015}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 43172098, "linear_total": 84934656, "linear_nnz": 19257344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1469440, "linear_attention_total": 2359296, "linear_attention_nnz": 443392, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 396288, "linear_dense_total": 4718592, "linear_dense_nnz": 1296384}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692096, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 2113536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2728960, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 1973248}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2531328, "linear_attention_total": 2359296, "linear_attention_nnz": 565248, "linear_dense_total": 4718592, "linear_dense_nnz": 1966080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434048, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 1887232}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1978368, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 1502208}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638400, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1000448}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 620544, "linear_attention_total": 2359296, "linear_attention_nnz": 310272, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 457728, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 308224}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 467968, "linear_attention_total": 2359296, "linear_attention_nnz": 152576, "linear_dense_total": 4718592, "linear_dense_nnz": 315392}}, "total_sparsity": 60.35371946964616, "linear_sparsity": 77.3268711419753}, "speed": {"eval_elapsed_time": 17.99394460907206}, "opt_eval_metrics": {"exact_match": 76.92526017029329, "f1": 85.21713644985097}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42975330, "linear_total": 84934656, "linear_nnz": 19060736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1463296, "linear_attention_total": 2359296, "linear_attention_nnz": 455680, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1699840, "linear_attention_total": 2359296, "linear_attention_nnz": 399360, "linear_dense_total": 4718592, "linear_dense_nnz": 1300480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2724864, "linear_attention_total": 2359296, "linear_attention_nnz": 544768, "linear_dense_total": 4718592, "linear_dense_nnz": 2180096}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2670592, "linear_attention_total": 2359296, "linear_attention_nnz": 731136, "linear_dense_total": 4718592, "linear_dense_nnz": 1939456}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2498560, "linear_attention_total": 2359296, "linear_attention_nnz": 557056, "linear_dense_total": 4718592, "linear_dense_nnz": 1941504}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2407424, "linear_attention_total": 2359296, "linear_attention_nnz": 527360, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1928192, "linear_attention_total": 2359296, "linear_attention_nnz": 472064, "linear_dense_total": 4718592, "linear_dense_nnz": 1456128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 607232, "linear_dense_total": 4718592, "linear_dense_nnz": 977920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 317440}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455680, "linear_attention_total": 2359296, "linear_attention_nnz": 308224, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 240640, "linear_dense_total": 4718592, "linear_dense_nnz": 305152}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 329728}}, "total_sparsity": 60.53441764482857, "linear_sparsity": 77.55835262345678}, "speed": {"eval_elapsed_time": 17.949384653009474}, "opt_eval_metrics": {"exact_match": 77.08609271523179, "f1": 85.20287591064626}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53955042, "linear_total": 84934656, "linear_nnz": 30029824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2392064, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 1844224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2719744, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 2172928}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3872768, "linear_attention_total": 2359296, "linear_attention_nnz": 675840, "linear_dense_total": 4718592, "linear_dense_nnz": 3196928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4077568, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 3111936}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4003840, "linear_attention_total": 2359296, "linear_attention_nnz": 896000, "linear_dense_total": 4718592, "linear_dense_nnz": 3107840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3832832, "linear_attention_total": 2359296, "linear_attention_nnz": 696320, "linear_dense_total": 4718592, "linear_dense_nnz": 3136512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3280896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 2525184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2510848, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1711104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257472, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 747520}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 420864, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 748544, "linear_attention_total": 2359296, "linear_attention_nnz": 356352, "linear_dense_total": 4718592, "linear_dense_nnz": 392192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 651264, "linear_attention_total": 2359296, "linear_attention_nnz": 217088, "linear_dense_total": 4718592, "linear_dense_nnz": 434176}}, "total_sparsity": 50.45140657377771, "linear_sparsity": 64.6436149691358}, "speed": {"eval_elapsed_time": 22.46348627889529}, "opt_eval_metrics": {"exact_match": 78.94985808893094, "f1": 86.768721062838}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47626001, "linear_total": 84934656, "linear_nnz": 23714304, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2895360, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2330112, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3240960, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2061312}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3095040, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1915392}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2291712, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1800192}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2221056, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1827840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1861632, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1300992, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 1104384}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637376, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 582144, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 385536}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1052160, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 560640}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1205760, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 910848}}, "total_sparsity": 56.2635617989908, "linear_sparsity": 72.07935474537037}, "speed": {"eval_elapsed_time": 13.948209983995184}, "opt_eval_metrics": {"exact_match": 77.78618732261117, "f1": 85.70556837897196}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 45695714, "linear_total": 84934656, "linear_nnz": 21777408, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1549312, "linear_attention_total": 2359296, "linear_attention_nnz": 679936, "linear_dense_total": 4718592, "linear_dense_nnz": 869376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1868800, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 1269760}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2739200, "linear_attention_total": 2359296, "linear_attention_nnz": 875520, "linear_dense_total": 4718592, "linear_dense_nnz": 1863680}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3088384, "linear_attention_total": 2359296, "linear_attention_nnz": 1137664, "linear_dense_total": 4718592, "linear_dense_nnz": 1950720}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2821120, "linear_attention_total": 2359296, "linear_attention_nnz": 1033216, "linear_dense_total": 4718592, "linear_dense_nnz": 1787904}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2709504, "linear_attention_total": 2359296, "linear_attention_nnz": 850944, "linear_dense_total": 4718592, "linear_dense_nnz": 1858560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225152, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1426432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 987136}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049600, "linear_attention_total": 2359296, "linear_attention_nnz": 782336, "linear_dense_total": 4718592, "linear_dense_nnz": 267264}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649216, "linear_attention_total": 2359296, "linear_attention_nnz": 504832, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 662528, "linear_attention_total": 2359296, "linear_attention_nnz": 379904, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 548864, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 290816}}, "total_sparsity": 58.036204395746125, "linear_sparsity": 74.35980902777779}, "speed": {"eval_elapsed_time": 20.075127677991986}, "opt_eval_metrics": {"exact_match": 77.92809839167455, "f1": 85.97854187426412}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 44413282, "linear_total": 84934656, "linear_nnz": 20496384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1436672, "linear_attention_total": 2359296, "linear_attention_nnz": 647168, "linear_dense_total": 4718592, "linear_dense_nnz": 789504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798144, "linear_attention_total": 2359296, "linear_attention_nnz": 591872, "linear_dense_total": 4718592, "linear_dense_nnz": 1206272}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2583552, "linear_attention_total": 2359296, "linear_attention_nnz": 843776, "linear_dense_total": 4718592, "linear_dense_nnz": 1739776}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2975744, "linear_attention_total": 2359296, "linear_attention_nnz": 1118208, "linear_dense_total": 4718592, "linear_dense_nnz": 1857536}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1760256}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2509824, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 1718272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2085888, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 1330176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1731584, "linear_attention_total": 2359296, "linear_attention_nnz": 827392, "linear_dense_total": 4718592, "linear_dense_nnz": 904192}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 726016, "linear_dense_total": 4718592, "linear_dense_nnz": 257024}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 464896, "linear_dense_total": 4718592, "linear_dense_nnz": 118784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 622592, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 263168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 512000, "linear_attention_total": 2359296, "linear_attention_nnz": 240640, "linear_dense_total": 4718592, "linear_dense_nnz": 271360}}, "total_sparsity": 59.21390159343854, "linear_sparsity": 75.86805555555556}, "speed": {"eval_elapsed_time": 19.613351088017225}, "opt_eval_metrics": {"exact_match": 77.8713339640492, "f1": 85.84893170709621}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 67469538, "linear_total": 84934656, "linear_nnz": 43535360, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4336640, "linear_attention_total": 2359296, "linear_attention_nnz": 571392, "linear_dense_total": 4718592, "linear_dense_nnz": 3765248}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4451328, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 3852288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4783104, "linear_attention_total": 2359296, "linear_attention_nnz": 695296, "linear_dense_total": 4718592, "linear_dense_nnz": 4087808}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5047296, "linear_attention_total": 2359296, "linear_attention_nnz": 996352, "linear_dense_total": 4718592, "linear_dense_nnz": 4050944}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5032960, "linear_attention_total": 2359296, "linear_attention_nnz": 923648, "linear_dense_total": 4718592, "linear_dense_nnz": 4109312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4907008, "linear_attention_total": 2359296, "linear_attention_nnz": 865280, "linear_dense_total": 4718592, "linear_dense_nnz": 4041728}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4636672, "linear_attention_total": 2359296, "linear_attention_nnz": 778240, "linear_dense_total": 4718592, "linear_dense_nnz": 3858432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4243456, "linear_attention_total": 2359296, "linear_attention_nnz": 883712, "linear_dense_total": 4718592, "linear_dense_nnz": 3359744}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2818048, "linear_attention_total": 2359296, "linear_attention_nnz": 513024, "linear_dense_total": 4718592, "linear_dense_nnz": 2305024}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1289216, "linear_attention_total": 2359296, "linear_attention_nnz": 462848, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1047552, "linear_attention_total": 2359296, "linear_attention_nnz": 374784, "linear_dense_total": 4718592, "linear_dense_nnz": 672768}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 942080, "linear_attention_total": 2359296, "linear_attention_nnz": 235520, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}}, "total_sparsity": 38.04062450702838, "linear_sparsity": 48.742525077160494}, "speed": {"eval_elapsed_time": 26.131227070000023}, "opt_eval_metrics": {"exact_match": 79.67833491012298, "f1": 87.14623278516426}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 64400930, "linear_total": 84934656, "linear_nnz": 40469504, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3907584, "linear_attention_total": 2359296, "linear_attention_nnz": 527360, "linear_dense_total": 4718592, "linear_dense_nnz": 3380224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4186112, "linear_attention_total": 2359296, "linear_attention_nnz": 524288, "linear_dense_total": 4718592, "linear_dense_nnz": 3661824}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4629504, "linear_attention_total": 2359296, "linear_attention_nnz": 598016, "linear_dense_total": 4718592, "linear_dense_nnz": 4031488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5038080, "linear_attention_total": 2359296, "linear_attention_nnz": 930816, "linear_dense_total": 4718592, "linear_dense_nnz": 4107264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4822016, "linear_attention_total": 2359296, "linear_attention_nnz": 824320, "linear_dense_total": 4718592, "linear_dense_nnz": 3997696}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4773888, "linear_attention_total": 2359296, "linear_attention_nnz": 746496, "linear_dense_total": 4718592, "linear_dense_nnz": 4027392}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4408320, "linear_attention_total": 2359296, "linear_attention_nnz": 670720, "linear_dense_total": 4718592, "linear_dense_nnz": 3737600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3789824, "linear_attention_total": 2359296, "linear_attention_nnz": 794624, "linear_dense_total": 4718592, "linear_dense_nnz": 2995200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2176000, "linear_attention_total": 2359296, "linear_attention_nnz": 419840, "linear_dense_total": 4718592, "linear_dense_nnz": 1756160}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1011712, "linear_attention_total": 2359296, "linear_attention_nnz": 411648, "linear_dense_total": 4718592, "linear_dense_nnz": 600064}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 903168, "linear_attention_total": 2359296, "linear_attention_nnz": 307200, "linear_dense_total": 4718592, "linear_dense_nnz": 595968}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 823296, "linear_attention_total": 2359296, "linear_attention_nnz": 207872, "linear_dense_total": 4718592, "linear_dense_nnz": 615424}}, "total_sparsity": 40.85862268737366, "linear_sparsity": 52.35218942901234}, "speed": {"eval_elapsed_time": 25.23966666101478}, "opt_eval_metrics": {"exact_match": 79.29990539262063, "f1": 87.09851869948527}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 64383586, "linear_total": 84934656, "linear_nnz": 40452096, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3881984, "linear_attention_total": 2359296, "linear_attention_nnz": 501760, "linear_dense_total": 4718592, "linear_dense_nnz": 3380224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4185088, "linear_attention_total": 2359296, "linear_attention_nnz": 528384, "linear_dense_total": 4718592, "linear_dense_nnz": 3656704}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4703232, "linear_attention_total": 2359296, "linear_attention_nnz": 581632, "linear_dense_total": 4718592, "linear_dense_nnz": 4121600}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5060608, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 4144128}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4893696, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 4060160}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4817920, "linear_attention_total": 2359296, "linear_attention_nnz": 741376, "linear_dense_total": 4718592, "linear_dense_nnz": 4076544}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4459520, "linear_attention_total": 2359296, "linear_attention_nnz": 644096, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3720192, "linear_attention_total": 2359296, "linear_attention_nnz": 757760, "linear_dense_total": 4718592, "linear_dense_nnz": 2962432}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2070528, "linear_attention_total": 2359296, "linear_attention_nnz": 380928, "linear_dense_total": 4718592, "linear_dense_nnz": 1689600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 966656, "linear_attention_total": 2359296, "linear_attention_nnz": 395264, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 874496, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 561152}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818176, "linear_attention_total": 2359296, "linear_attention_nnz": 200704, "linear_dense_total": 4718592, "linear_dense_nnz": 617472}}, "total_sparsity": 40.874550222086434, "linear_sparsity": 52.37268518518518}, "speed": {"eval_elapsed_time": 25.169638738036156}, "opt_eval_metrics": {"exact_match": 79.22421948912014, "f1": 87.0664817371684}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41117954, "linear_total": 84934656, "linear_nnz": 17159424, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879296, "linear_attention_total": 2359296, "linear_attention_nnz": 1459968, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1487616, "linear_attention_total": 2359296, "linear_attention_nnz": 930048, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2451456, "linear_attention_total": 2359296, "linear_attention_nnz": 1651200, "linear_dense_total": 4718592, "linear_dense_nnz": 800256}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959168, "linear_attention_total": 2359296, "linear_attention_nnz": 1181952, "linear_dense_total": 4718592, "linear_dense_nnz": 777216}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1876992, "linear_attention_total": 2359296, "linear_attention_nnz": 996864, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 720384, "linear_dense_total": 4718592, "linear_dense_nnz": 886272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1242624, "linear_attention_total": 2359296, "linear_attention_nnz": 595968, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1026048, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1362432, "linear_attention_total": 2359296, "linear_attention_nnz": 1029120, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784128, "linear_attention_total": 2359296, "linear_attention_nnz": 673536, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563712, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.240103802270966, "linear_sparsity": 79.7969111689815}, "speed": {"eval_elapsed_time": 14.573690482182428}, "opt_eval_metrics": {"exact_match": 78.00378429517502, "f1": 85.86131877012127}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41037314, "linear_total": 84934656, "linear_nnz": 17078784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1881600, "linear_attention_total": 2359296, "linear_attention_nnz": 1460736, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488384, "linear_attention_total": 2359296, "linear_attention_nnz": 930816, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2430720, "linear_attention_total": 2359296, "linear_attention_nnz": 1636608, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1953024, "linear_attention_total": 2359296, "linear_attention_nnz": 1172736, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1822464, "linear_attention_total": 2359296, "linear_attention_nnz": 946944, "linear_dense_total": 4718592, "linear_dense_nnz": 875520}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602816, "linear_attention_total": 2359296, "linear_attention_nnz": 719616, "linear_dense_total": 4718592, "linear_dense_nnz": 883200}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1248768, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023744, "linear_attention_total": 2359296, "linear_attention_nnz": 530688, "linear_dense_total": 4718592, "linear_dense_nnz": 493056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360128, "linear_attention_total": 2359296, "linear_attention_nnz": 1026816, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 785664, "linear_attention_total": 2359296, "linear_attention_nnz": 675072, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 562176, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.31415802270676, "linear_sparsity": 79.89185474537037}, "speed": {"eval_elapsed_time": 14.54654596094042}, "opt_eval_metrics": {"exact_match": 78.04162724692526, "f1": 85.89832211406967}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 65744386, "linear_total": 84934656, "linear_nnz": 41809920, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3775488, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 3140608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4079616, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 3477504}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4937728, "linear_attention_total": 2359296, "linear_attention_nnz": 1008640, "linear_dense_total": 4718592, "linear_dense_nnz": 3929088}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5286912, "linear_attention_total": 2359296, "linear_attention_nnz": 1197056, "linear_dense_total": 4718592, "linear_dense_nnz": 4089856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5135360, "linear_attention_total": 2359296, "linear_attention_nnz": 1181696, "linear_dense_total": 4718592, "linear_dense_nnz": 3953664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5012480, "linear_attention_total": 2359296, "linear_attention_nnz": 1005568, "linear_dense_total": 4718592, "linear_dense_nnz": 4006912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4720640, "linear_attention_total": 2359296, "linear_attention_nnz": 1043456, "linear_dense_total": 4718592, "linear_dense_nnz": 3677184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3708928, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 2777088}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2311168, "linear_attention_total": 2359296, "linear_attention_nnz": 862208, "linear_dense_total": 4718592, "linear_dense_nnz": 1448960}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1058816, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 458752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 951296, "linear_attention_total": 2359296, "linear_attention_nnz": 456704, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 831488, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 541696}}, "total_sparsity": 39.6248852522324, "linear_sparsity": 50.774016203703695}, "speed": {"eval_elapsed_time": 27.04506094707176}, "opt_eval_metrics": {"exact_match": 80.48249763481552, "f1": 87.91705961229685}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 38110440, "linear_total": 84934656, "linear_nnz": 14151910, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1521793, "linear_attention_total": 2359296, "linear_attention_nnz": 87221, "linear_dense_total": 4718592, "linear_dense_nnz": 1434572}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637844, "linear_attention_total": 2359296, "linear_attention_nnz": 157517, "linear_dense_total": 4718592, "linear_dense_nnz": 1480327}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1723746, "linear_attention_total": 2359296, "linear_attention_nnz": 188172, "linear_dense_total": 4718592, "linear_dense_nnz": 1535574}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742961, "linear_attention_total": 2359296, "linear_attention_nnz": 230341, "linear_dense_total": 4718592, "linear_dense_nnz": 1512620}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1687428, "linear_attention_total": 2359296, "linear_attention_nnz": 240387, "linear_dense_total": 4718592, "linear_dense_nnz": 1447041}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1623377, "linear_attention_total": 2359296, "linear_attention_nnz": 195780, "linear_dense_total": 4718592, "linear_dense_nnz": 1427597}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1429982, "linear_attention_total": 2359296, "linear_attention_nnz": 184963, "linear_dense_total": 4718592, "linear_dense_nnz": 1245019}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130199, "linear_attention_total": 2359296, "linear_attention_nnz": 172954, "linear_dense_total": 4718592, "linear_dense_nnz": 957245}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 773896, "linear_attention_total": 2359296, "linear_attention_nnz": 138133, "linear_dense_total": 4718592, "linear_dense_nnz": 635763}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417863, "linear_attention_total": 2359296, "linear_attention_nnz": 112972, "linear_dense_total": 4718592, "linear_dense_nnz": 304891}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279992, "linear_attention_total": 2359296, "linear_attention_nnz": 75446, "linear_dense_total": 4718592, "linear_dense_nnz": 204546}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 182829, "linear_attention_total": 2359296, "linear_attention_nnz": 38439, "linear_dense_total": 4718592, "linear_dense_nnz": 144390}}, "total_sparsity": 65.00199746198996, "linear_sparsity": 83.3378850677867}, "speed": {"eval_elapsed_time": 35.92588178999722}, "opt_eval_metrics": {"exact_match": 80.22705771050141, "f1": 88.08154392563726}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37366845, "linear_total": 84934656, "linear_nnz": 13408315, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1442154, "linear_attention_total": 2359296, "linear_attention_nnz": 79341, "linear_dense_total": 4718592, "linear_dense_nnz": 1362813}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1557975, "linear_attention_total": 2359296, "linear_attention_nnz": 146964, "linear_dense_total": 4718592, "linear_dense_nnz": 1411011}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637409, "linear_attention_total": 2359296, "linear_attention_nnz": 173655, "linear_dense_total": 4718592, "linear_dense_nnz": 1463754}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1655712, "linear_attention_total": 2359296, "linear_attention_nnz": 213353, "linear_dense_total": 4718592, "linear_dense_nnz": 1442359}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601748, "linear_attention_total": 2359296, "linear_attention_nnz": 221518, "linear_dense_total": 4718592, "linear_dense_nnz": 1380230}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1539647, "linear_attention_total": 2359296, "linear_attention_nnz": 179373, "linear_dense_total": 4718592, "linear_dense_nnz": 1360274}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1352289, "linear_attention_total": 2359296, "linear_attention_nnz": 168393, "linear_dense_total": 4718592, "linear_dense_nnz": 1183896}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1066215, "linear_attention_total": 2359296, "linear_attention_nnz": 159612, "linear_dense_total": 4718592, "linear_dense_nnz": 906603}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727923, "linear_attention_total": 2359296, "linear_attention_nnz": 127230, "linear_dense_total": 4718592, "linear_dense_nnz": 600693}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 390947, "linear_attention_total": 2359296, "linear_attention_nnz": 105257, "linear_dense_total": 4718592, "linear_dense_nnz": 285690}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 262617, "linear_attention_total": 2359296, "linear_attention_nnz": 70746, "linear_dense_total": 4718592, "linear_dense_nnz": 191871}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 173679, "linear_attention_total": 2359296, "linear_attention_nnz": 36271, "linear_dense_total": 4718592, "linear_dense_nnz": 137408}}, "total_sparsity": 65.68486388119823, "linear_sparsity": 84.21337575088313}, "speed": {"eval_elapsed_time": 35.89134427602403}, "opt_eval_metrics": {"exact_match": 80.53926206244087, "f1": 88.07603620459668}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 45252556, "linear_total": 84934656, "linear_nnz": 21294026, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2152743, "linear_attention_total": 2359296, "linear_attention_nnz": 158912, "linear_dense_total": 4718592, "linear_dense_nnz": 1993831}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2265132, "linear_attention_total": 2359296, "linear_attention_nnz": 234395, "linear_dense_total": 4718592, "linear_dense_nnz": 2030737}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415512, "linear_attention_total": 2359296, "linear_attention_nnz": 301048, "linear_dense_total": 4718592, "linear_dense_nnz": 2114464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465567, "linear_attention_total": 2359296, "linear_attention_nnz": 358791, "linear_dense_total": 4718592, "linear_dense_nnz": 2106776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2457267, "linear_attention_total": 2359296, "linear_attention_nnz": 398673, "linear_dense_total": 4718592, "linear_dense_nnz": 2058594}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2410577, "linear_attention_total": 2359296, "linear_attention_nnz": 367333, "linear_dense_total": 4718592, "linear_dense_nnz": 2043244}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2206780, "linear_attention_total": 2359296, "linear_attention_nnz": 344288, "linear_dense_total": 4718592, "linear_dense_nnz": 1862492}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1819031, "linear_attention_total": 2359296, "linear_attention_nnz": 304514, "linear_dense_total": 4718592, "linear_dense_nnz": 1514517}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1364821, "linear_attention_total": 2359296, "linear_attention_nnz": 265513, "linear_dense_total": 4718592, "linear_dense_nnz": 1099308}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 828990, "linear_attention_total": 2359296, "linear_attention_nnz": 201714, "linear_dense_total": 4718592, "linear_dense_nnz": 627276}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574541, "linear_attention_total": 2359296, "linear_attention_nnz": 134277, "linear_dense_total": 4718592, "linear_dense_nnz": 440264}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 333065, "linear_attention_total": 2359296, "linear_attention_nnz": 63309, "linear_dense_total": 4718592, "linear_dense_nnz": 269756}}, "total_sparsity": 58.4431701722824, "linear_sparsity": 74.92893124804085}, "speed": {"eval_elapsed_time": 38.17887881118804}, "opt_eval_metrics": {"exact_match": 81.40018921475875, "f1": 88.66263407974378}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 38467586, "linear_total": 84934656, "linear_nnz": 14509056, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1740288, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 940032, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 448512}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1992192, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1728000, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1651200, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 717312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245696, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877056, "linear_attention_total": 2359296, "linear_attention_nnz": 442368, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049088, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 629760, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 233472}}, "total_sparsity": 64.67401918059409, "linear_sparsity": 82.9173900462963}, "speed": {"eval_elapsed_time": 11.783776527037844}, "opt_eval_metrics": {"exact_match": 77.9848628192999, "f1": 85.88807770994393}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 38065154, "linear_total": 84934656, "linear_nnz": 14106624, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1669632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 913920, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 422400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969152, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 631296}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1559040, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1219584, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 471552}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 955392, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1090560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 431616, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.04358500448319, "linear_sparsity": 83.3912037037037}, "speed": {"eval_elapsed_time": 11.86458179494366}, "opt_eval_metrics": {"exact_match": 77.94701986754967, "f1": 85.90050035022541}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38095874, "linear_total": 84934656, "linear_nnz": 14137344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907776, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1967616, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 591360}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1711104, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1608192, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1214976, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 625152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1161216, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 953856, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 413184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1041408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 482304, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 757248, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.0153738728886, "linear_sparsity": 83.35503472222221}, "speed": {"eval_elapsed_time": 11.63978576194495}, "opt_eval_metrics": {"exact_match": 77.43614001892148, "f1": 85.51882546766822}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35435778, "linear_total": 84934656, "linear_nnz": 11477248, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887040, "linear_attention_total": 2359296, "linear_attention_nnz": 384768, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1057792, "linear_attention_total": 2359296, "linear_attention_nnz": 355840, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1285888, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497088, "linear_attention_total": 2359296, "linear_attention_nnz": 672256, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1350912, "linear_attention_total": 2359296, "linear_attention_nnz": 418560, "linear_dense_total": 4718592, "linear_dense_nnz": 932352}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1395712, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1154816, "linear_attention_total": 2359296, "linear_attention_nnz": 498944, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059840, "linear_attention_total": 2359296, "linear_attention_nnz": 497664, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 609024, "linear_attention_total": 2359296, "linear_attention_nnz": 297216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 436224, "linear_attention_total": 2359296, "linear_attention_nnz": 316416, "linear_dense_total": 4718592, "linear_dense_nnz": 119808}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 256512, "linear_dense_total": 4718592, "linear_dense_nnz": 115200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371200, "linear_attention_total": 2359296, "linear_attention_nnz": 150016, "linear_dense_total": 4718592, "linear_dense_nnz": 221184}}, "total_sparsity": 67.45822277621669, "linear_sparsity": 86.4869671103395}, "speed": {"eval_elapsed_time": 13.864284622017294}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.28341140334766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 49759613, "linear_total": 84934656, "linear_nnz": 25846272, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3251712, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2803200, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1918464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3320832, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3353088, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2075136}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2469888, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2322432, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1929216}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2098176, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1508352}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1641984, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1248768}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638912, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566784, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1075200, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 583680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1304064, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1009152}}, "total_sparsity": 54.304199529987116, "linear_sparsity": 69.56922743055556}, "speed": {"eval_elapsed_time": 14.753634401829913}, "opt_eval_metrics": {"exact_match": 77.8713339640492, "f1": 85.86552240887988}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 47529298, "linear_total": 84934656, "linear_nnz": 23617536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2446848, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1660416}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2978304, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1995264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2216448, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1724928}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1824768, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1333248}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526784, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1133568}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35236717199184, "linear_sparsity": 72.19328703703704}, "speed": {"eval_elapsed_time": 14.023887678980827}, "opt_eval_metrics": {"exact_match": 78.06054872280038, "f1": 85.94002543374285}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47521613, "linear_total": 84934656, "linear_nnz": 23609856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2445312, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1658880}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2976768, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1993728}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214912, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1723392}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1823232, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1525248, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1132032}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35942454654601, "linear_sparsity": 72.2023292824074}, "speed": {"eval_elapsed_time": 14.008215571055189}, "opt_eval_metrics": {"exact_match": 78.10785241248817, "f1": 86.00835164251778}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35398714, "linear_total": 84934656, "linear_nnz": 11493376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907264, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1074176, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253376, "linear_attention_total": 2359296, "linear_attention_nnz": 402432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1508352, "linear_attention_total": 2359296, "linear_attention_nnz": 681984, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1328640, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1422848, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1094656, "linear_attention_total": 2359296, "linear_attention_nnz": 449536, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1102848, "linear_attention_total": 2359296, "linear_attention_nnz": 577536, "linear_dense_total": 4718592, "linear_dense_nnz": 525312}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 628224, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 434176, "linear_attention_total": 2359296, "linear_attention_nnz": 320512, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 377344, "linear_attention_total": 2359296, "linear_attention_nnz": 256000, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 361472, "linear_attention_total": 2359296, "linear_attention_nnz": 146432, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 67.49225980035152, "linear_sparsity": 86.46797839506173}, "speed": {"eval_elapsed_time": 13.743516992079094}, "opt_eval_metrics": {"exact_match": 77.18070009460737, "f1": 85.6109462422114}}, "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-215000": {"stats": {"total": 334094338, "nnz": 68649433, "linear_total": 301989888, "linear_nnz": 36684800, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1167360, "linear_attention_total": 4194304, "linear_attention_nnz": 974848, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 576512, "linear_attention_total": 4194304, "linear_attention_nnz": 306176, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 584704, "linear_attention_total": 4194304, "linear_attention_nnz": 297984, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1248256, "linear_attention_total": 4194304, "linear_attention_nnz": 834560, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 848896, "linear_attention_total": 4194304, "linear_attention_nnz": 381952, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 959488, "linear_attention_total": 4194304, "linear_attention_nnz": 406528, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1130496, "linear_attention_total": 4194304, "linear_attention_nnz": 522240, "linear_dense_total": 8388608, "linear_dense_nnz": 608256}, "7": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1209344, "linear_attention_total": 4194304, "linear_attention_nnz": 771072, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1076224, "linear_attention_total": 4194304, "linear_attention_nnz": 414720, "linear_dense_total": 8388608, "linear_dense_nnz": 661504}, "9": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1839104, "linear_attention_total": 4194304, "linear_attention_nnz": 1091584, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2709504, "linear_attention_total": 4194304, "linear_attention_nnz": 1714176, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2908160, "linear_attention_total": 4194304, "linear_attention_nnz": 1875968, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3074048, "linear_attention_total": 4194304, "linear_attention_nnz": 1832960, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3335168, "linear_attention_total": 4194304, "linear_attention_nnz": 2155520, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2851840, "linear_attention_total": 4194304, "linear_attention_nnz": 1942528, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2761728, "linear_attention_total": 4194304, "linear_attention_nnz": 2079744, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2316288, "linear_attention_total": 4194304, "linear_attention_nnz": 1843200, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1950720, "linear_attention_total": 4194304, "linear_attention_nnz": 1582080, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1757184, "linear_attention_total": 4194304, "linear_attention_nnz": 1435648, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 988160, "linear_attention_total": 4194304, "linear_attention_nnz": 717824, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 447488, "linear_attention_total": 4194304, "linear_attention_nnz": 334848, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 436224, "linear_attention_total": 4194304, "linear_attention_nnz": 358400, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 214016, "linear_attention_total": 4194304, "linear_attention_nnz": 134144, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 293888, "linear_attention_total": 4194304, "linear_attention_nnz": 111616, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 79.45208128609471, "linear_sparsity": 87.85230848524306}, "speed": {"eval_elapsed_time": 38.00938259717077}, "opt_eval_metrics": {"exact_match": 82.33680227057711, "f1": 89.04761607630476}}, "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-220000": {"stats": {"total": 334094338, "nnz": 68429014, "linear_total": 301989888, "linear_nnz": 36464640, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1181696, "linear_attention_total": 4194304, "linear_attention_nnz": 989184, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 593920, "linear_attention_total": 4194304, "linear_attention_nnz": 323584, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 573440, "linear_attention_total": 4194304, "linear_attention_nnz": 286720, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1215488, "linear_attention_total": 4194304, "linear_attention_nnz": 801792, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 863232, "linear_attention_total": 4194304, "linear_attention_nnz": 396288, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 958464, "linear_attention_total": 4194304, "linear_attention_nnz": 405504, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1124352, "linear_attention_total": 4194304, "linear_attention_nnz": 520192, "linear_dense_total": 8388608, "linear_dense_nnz": 604160}, "7": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1203200, "linear_attention_total": 4194304, "linear_attention_nnz": 764928, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1083392, "linear_attention_total": 4194304, "linear_attention_nnz": 423936, "linear_dense_total": 8388608, "linear_dense_nnz": 659456}, "9": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1817600, "linear_attention_total": 4194304, "linear_attention_nnz": 1070080, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2741248, "linear_attention_total": 4194304, "linear_attention_nnz": 1745920, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2934784, "linear_attention_total": 4194304, "linear_attention_nnz": 1902592, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3023872, "linear_attention_total": 4194304, "linear_attention_nnz": 1782784, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3326976, "linear_attention_total": 4194304, "linear_attention_nnz": 2147328, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2827264, "linear_attention_total": 4194304, "linear_attention_nnz": 1917952, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2731008, "linear_attention_total": 4194304, "linear_attention_nnz": 2049024, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2293760, "linear_attention_total": 4194304, "linear_attention_nnz": 1820672, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1931264, "linear_attention_total": 4194304, "linear_attention_nnz": 1562624, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1712128, "linear_attention_total": 4194304, "linear_attention_nnz": 1390592, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 958464, "linear_attention_total": 4194304, "linear_attention_nnz": 688128, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 439296, "linear_attention_total": 4194304, "linear_attention_nnz": 326656, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 421888, "linear_attention_total": 4194304, "linear_attention_nnz": 344064, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 208896, "linear_attention_total": 4194304, "linear_attention_nnz": 129024, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 299008, "linear_attention_total": 4194304, "linear_attention_nnz": 116736, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 79.51805636406804, "linear_sparsity": 87.92521158854166}, "speed": {"eval_elapsed_time": 37.733626022934914}, "opt_eval_metrics": {"exact_match": 82.13812677388836, "f1": 89.03656646065757}}, "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-221320": {"stats": {"total": 334094338, "nnz": 68456822, "linear_total": 301989888, "linear_nnz": 36492288, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1171456, "linear_attention_total": 4194304, "linear_attention_nnz": 978944, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 589824, "linear_attention_total": 4194304, "linear_attention_nnz": 319488, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 580608, "linear_attention_total": 4194304, "linear_attention_nnz": 293888, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1209344, "linear_attention_total": 4194304, "linear_attention_nnz": 795648, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 862208, "linear_attention_total": 4194304, "linear_attention_nnz": 395264, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 945152, "linear_attention_total": 4194304, "linear_attention_nnz": 392192, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1127424, "linear_attention_total": 4194304, "linear_attention_nnz": 523264, "linear_dense_total": 8388608, "linear_dense_nnz": 604160}, "7": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1222656, "linear_attention_total": 4194304, "linear_attention_nnz": 784384, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1076224, "linear_attention_total": 4194304, "linear_attention_nnz": 416768, "linear_dense_total": 8388608, "linear_dense_nnz": 659456}, "9": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1816576, "linear_attention_total": 4194304, "linear_attention_nnz": 1069056, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2749440, "linear_attention_total": 4194304, "linear_attention_nnz": 1754112, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2954240, "linear_attention_total": 4194304, "linear_attention_nnz": 1922048, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3016704, "linear_attention_total": 4194304, "linear_attention_nnz": 1775616, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3329024, "linear_attention_total": 4194304, "linear_attention_nnz": 2149376, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2864128, "linear_attention_total": 4194304, "linear_attention_nnz": 1954816, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2747392, "linear_attention_total": 4194304, "linear_attention_nnz": 2065408, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2296832, "linear_attention_total": 4194304, "linear_attention_nnz": 1823744, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1927168, "linear_attention_total": 4194304, "linear_attention_nnz": 1558528, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1678336, "linear_attention_total": 4194304, "linear_attention_nnz": 1356800, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 958464, "linear_attention_total": 4194304, "linear_attention_nnz": 688128, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 439296, "linear_attention_total": 4194304, "linear_attention_nnz": 326656, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 421888, "linear_attention_total": 4194304, "linear_attention_nnz": 344064, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 208896, "linear_attention_total": 4194304, "linear_attention_nnz": 129024, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 299008, "linear_attention_total": 4194304, "linear_attention_nnz": 116736, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 79.50973296650122, "linear_sparsity": 87.91605631510416}, "speed": {"eval_elapsed_time": 37.993687882088125}, "opt_eval_metrics": {"exact_match": 82.30842005676443, "f1": 89.04987146464723}}, "/home/lagunas/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 42173698, "linear_total": 84934656, "linear_nnz": 18215168, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1516544, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1758464, "linear_attention_total": 2359296, "linear_attention_nnz": 564992, "linear_dense_total": 4718592, "linear_dense_nnz": 1193472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2030080, "linear_attention_total": 2359296, "linear_attention_nnz": 646144, "linear_dense_total": 4718592, "linear_dense_nnz": 1383936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2328832, "linear_attention_total": 2359296, "linear_attention_nnz": 969472, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2283264, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1425408}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2099200, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 1396224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1846784, "linear_attention_total": 2359296, "linear_attention_nnz": 774656, "linear_dense_total": 4718592, "linear_dense_nnz": 1072128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1589760, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 783360}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 967424, "linear_attention_total": 2359296, "linear_attention_nnz": 520448, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 617216, "linear_attention_total": 2359296, "linear_attention_nnz": 435968, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 521984, "linear_attention_total": 2359296, "linear_attention_nnz": 354560, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655616, "linear_attention_total": 2359296, "linear_attention_nnz": 231680, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.27058124647028, "linear_sparsity": 78.55390383873457}, "speed": {"eval_elapsed_time": 16.755018649157137}, "opt_eval_metrics": {"exact_match": 79.25260170293284, "f1": 86.93528973939952}}, "/home/lagunas/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42038274, "linear_total": 84934656, "linear_nnz": 18079744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1493248, "linear_attention_total": 2359296, "linear_attention_nnz": 519424, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757440, "linear_attention_total": 2359296, "linear_attention_nnz": 565504, "linear_dense_total": 4718592, "linear_dense_nnz": 1191936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028800, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1382400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2297088, "linear_attention_total": 2359296, "linear_attention_nnz": 937728, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2270464, "linear_attention_total": 2359296, "linear_attention_nnz": 846592, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081792, "linear_attention_total": 2359296, "linear_attention_nnz": 688640, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815296, "linear_attention_total": 2359296, "linear_attention_nnz": 744704, "linear_dense_total": 4718592, "linear_dense_nnz": 1070592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1613312, "linear_attention_total": 2359296, "linear_attention_nnz": 831488, "linear_dense_total": 4718592, "linear_dense_nnz": 781824}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 522496, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 594944, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 513792, "linear_attention_total": 2359296, "linear_attention_nnz": 346368, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 644096, "linear_attention_total": 2359296, "linear_attention_nnz": 220160, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.39494531824976, "linear_sparsity": 78.7133487654321}, "speed": {"eval_elapsed_time": 16.721035415073857}, "opt_eval_metrics": {"exact_match": 79.2620624408704, "f1": 86.97825692623259}}, "/home/lagunas/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold_apme-sigmoied_threshold_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 61067266, "linear_total": 84934656, "linear_nnz": 37108736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3221504, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 2607104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3504128, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 2899968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4439040, "linear_attention_total": 2359296, "linear_attention_nnz": 730112, "linear_dense_total": 4718592, "linear_dense_nnz": 3708928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4859904, "linear_attention_total": 2359296, "linear_attention_nnz": 1044480, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4734976, "linear_attention_total": 2359296, "linear_attention_nnz": 1012736, "linear_dense_total": 4718592, "linear_dense_nnz": 3722240}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4660224, "linear_attention_total": 2359296, "linear_attention_nnz": 882688, "linear_dense_total": 4718592, "linear_dense_nnz": 3777536}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4239360, "linear_attention_total": 2359296, "linear_attention_nnz": 980992, "linear_dense_total": 4718592, "linear_dense_nnz": 3258368}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3137536, "linear_attention_total": 2359296, "linear_attention_nnz": 903168, "linear_dense_total": 4718592, "linear_dense_nnz": 2234368}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835008, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 1124352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877568, "linear_attention_total": 2359296, "linear_attention_nnz": 552960, "linear_dense_total": 4718592, "linear_dense_nnz": 324608}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852992, "linear_attention_total": 2359296, "linear_attention_nnz": 401408, "linear_dense_total": 4718592, "linear_dense_nnz": 451584}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746496, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 501760}}, "total_sparsity": 43.920030037508496, "linear_sparsity": 56.309076003086425}, "speed": {"eval_elapsed_time": 25.03221789188683}, "opt_eval_metrics": {"exact_match": 79.64049195837275, "f1": 87.40026291426761}}, "/home/lagunas/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49229570, "linear_total": 84934656, "linear_nnz": 25271040, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214400, "linear_attention_total": 2359296, "linear_attention_nnz": 721408, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2390784, "linear_attention_total": 2359296, "linear_attention_nnz": 635136, "linear_dense_total": 4718592, "linear_dense_nnz": 1755648}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850560, "linear_attention_total": 2359296, "linear_attention_nnz": 972032, "linear_dense_total": 4718592, "linear_dense_nnz": 1878528}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3188736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 1932288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3149824, "linear_attention_total": 2359296, "linear_attention_nnz": 1260544, "linear_dense_total": 4718592, "linear_dense_nnz": 1889280}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 1784832}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2455040, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015744, "linear_attention_total": 2359296, "linear_attention_nnz": 988160, "linear_dense_total": 4718592, "linear_dense_nnz": 1027584}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1550080, "linear_attention_total": 2359296, "linear_attention_nnz": 903424, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 886784, "linear_attention_total": 2359296, "linear_attention_nnz": 636416, "linear_dense_total": 4718592, "linear_dense_nnz": 250368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 682752, "linear_attention_total": 2359296, "linear_attention_nnz": 484608, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 980224, "linear_attention_total": 2359296, "linear_attention_nnz": 313600, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}}, "total_sparsity": 54.79095450471988, "linear_sparsity": 70.2464916087963}, "speed": {"eval_elapsed_time": 20.265998144168407}, "opt_eval_metrics": {"exact_match": 80.6244087038789, "f1": 88.07723643002453}}, "/home/lagunas/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 43891202, "linear_total": 84934656, "linear_nnz": 19932672, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045184, "linear_attention_total": 2359296, "linear_attention_nnz": 427776, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102784, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 1708032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2424576, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 1955328}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2502912, "linear_attention_total": 2359296, "linear_attention_nnz": 579840, "linear_dense_total": 4718592, "linear_dense_nnz": 1923072}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2376960, "linear_attention_total": 2359296, "linear_attention_nnz": 539904, "linear_dense_total": 4718592, "linear_dense_nnz": 1837056}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2201856, "linear_attention_total": 2359296, "linear_attention_nnz": 424704, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1907712, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 1468416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1580544, "linear_attention_total": 2359296, "linear_attention_nnz": 428544, "linear_dense_total": 4718592, "linear_dense_nnz": 1152000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1095168, "linear_attention_total": 2359296, "linear_attention_nnz": 397824, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527616, "linear_attention_total": 2359296, "linear_attention_nnz": 235776, "linear_dense_total": 4718592, "linear_dense_nnz": 291840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 428544, "linear_attention_total": 2359296, "linear_attention_nnz": 182784, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738816, "linear_attention_total": 2359296, "linear_attention_nnz": 112128, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}}, "total_sparsity": 59.6933438975695, "linear_sparsity": 76.53175636574075}, "speed": {"eval_elapsed_time": 23.130286294035614}, "opt_eval_metrics": {"exact_match": 79.15799432355723, "f1": 86.94169166073364}}}, "base_speed_report": {"eval_elapsed_time": 38.708956059999764}} \ No newline at end of file diff --git a/analysis/files/results/results14.json b/analysis/files/results/results14.json deleted file mode 100644 index e5029609..00000000 --- a/analysis/files/results/results14.json +++ /dev/null @@ -1 +0,0 @@ -{"checkpoints": {"/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": {"stats": {"total": 90984386, "nnz": 40951962, "linear_total": 67043328, "linear_nnz": 17043456, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1268736, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "1": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1296384, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "2": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1440768, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "3": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2006016, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "4": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1709568, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1863168, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "6": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1628160, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1901568, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 525312}, "8": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 923136, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1096704, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1104384, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 804864, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 54.990121052199, "linear_sparsity": 74.57844574780059}, "speed": {"eval_elapsed_time": 20.51507593272254, "cuda_eval_elapsed_time": 13.414341842651368}, "opt_eval_metrics": {"exact_match": 78.63765373699148, "f1": 86.69392512957342}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-20000": {"stats": {"total": 99840386, "nnz": 50390485, "linear_total": 75890688, "linear_nnz": 26472960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1903104, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 526848}, "1": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1735680, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 752640}, "2": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2840064, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 873984}, "3": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2721792, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "4": {"total": 768, "nnz": 768, "linear_total": 6881280, "linear_nnz": 3208704, "linear_attention_total": 2162688, "linear_attention_nnz": 2162688, "linear_dense_total": 4718592, "linear_dense_nnz": 1046016}, "5": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2952192, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 986112}, "6": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2509824, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 740352}, "7": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2131968, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "8": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2259456, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1293312, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 1671168, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 98304}, "11": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1245696, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}}, "total_sparsity": 49.52895614806617, "linear_sparsity": 65.11698510362694}, "speed": {"eval_elapsed_time": 26.5463269171305, "cuda_eval_elapsed_time": 19.462684043884277}, "opt_eval_metrics": {"exact_match": 80.86092715231788, "f1": 88.26868699204444}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-22132": {"stats": {"total": 99840386, "nnz": 50390485, "linear_total": 75890688, "linear_nnz": 26472960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1903104, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 526848}, "1": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1735680, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 752640}, "2": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2840064, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 873984}, "3": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2721792, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "4": {"total": 768, "nnz": 768, "linear_total": 6881280, "linear_nnz": 3208704, "linear_attention_total": 2162688, "linear_attention_nnz": 2162688, "linear_dense_total": 4718592, "linear_dense_nnz": 1046016}, "5": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2952192, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 986112}, "6": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2509824, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 740352}, "7": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2131968, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "8": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2259456, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1293312, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 1671168, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 98304}, "11": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1245696, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}}, "total_sparsity": 49.52895614806617, "linear_sparsity": 65.11698510362694}, "speed": {"eval_elapsed_time": 26.70568129932508, "cuda_eval_elapsed_time": 19.503536235809328}, "opt_eval_metrics": {"exact_match": 80.87038789025544, "f1": 88.24613086360249}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_v0/checkpoint-20000": {"stats": {"total": 274806402, "nnz": 85945974, "linear_total": 242745344, "linear_nnz": 53977088, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 1765376, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 794624, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 811008, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1724416, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 991232, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 1077248, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 1390592, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 604160}, "7": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1748992, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1708032, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 659456}, "9": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 2320384, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3616768, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3653632, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4386816, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4325376, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3792896, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3303424, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3356672, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3252224, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3205120, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 10485760, "linear_nnz": 2367488, "linear_attention_total": 2097152, "linear_attention_nnz": 2097152, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 899072, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1388544, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 866304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1230848, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 68.72490110328653, "linear_sparsity": 77.763903887689}, "speed": {"eval_elapsed_time": 44.695561807602644, "cuda_eval_elapsed_time": 37.43465453338623}, "opt_eval_metrics": {"exact_match": 81.78807947019868, "f1": 88.89084139605751}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_v0/checkpoint-22132": {"stats": {"total": 274806402, "nnz": 85945974, "linear_total": 242745344, "linear_nnz": 53977088, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 1765376, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 794624, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 811008, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1724416, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 991232, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 1077248, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 1390592, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 604160}, "7": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1748992, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1708032, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 659456}, "9": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 2320384, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3616768, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3653632, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4386816, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4325376, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3792896, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3303424, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3356672, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3252224, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3205120, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 10485760, "linear_nnz": 2367488, "linear_attention_total": 2097152, "linear_attention_nnz": 2097152, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 899072, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1388544, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 866304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1230848, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 68.72490110328653, "linear_sparsity": 77.763903887689}, "speed": {"eval_elapsed_time": 44.85223976522684, "cuda_eval_elapsed_time": 37.58739047241211}, "opt_eval_metrics": {"exact_match": 81.67455061494796, "f1": 88.83202816693091}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-15000": {"stats": {"total": 95510786, "nnz": 52448657, "linear_total": 71565312, "linear_nnz": 28531200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2125824, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2357760, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3317760, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3334656, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3495936, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2809344, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2646528, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2363904, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2119680, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1428480, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1367040, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1164288, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 45.08614241746477, "linear_sparsity": 60.1326407967033}, "speed": {"eval_elapsed_time": 25.723125794902444, "cuda_eval_elapsed_time": 18.497972122192383}, "opt_eval_metrics": {"exact_match": 80.80416272469253, "f1": 88.20260662536118}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-20000": {"stats": {"total": 95510786, "nnz": 52448657, "linear_total": 71565312, "linear_nnz": 28531200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2125824, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2357760, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3317760, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3334656, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3495936, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2809344, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2646528, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2363904, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2119680, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1428480, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1367040, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1164288, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 45.08614241746477, "linear_sparsity": 60.1326407967033}, "speed": {"eval_elapsed_time": 25.615046767052263, "cuda_eval_elapsed_time": 18.414370426177978}, "opt_eval_metrics": {"exact_match": 80.6717123935667, "f1": 88.128983727943}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": {"stats": {"total": 95510786, "nnz": 52448657, "linear_total": 71565312, "linear_nnz": 28531200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2125824, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2357760, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3317760, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3334656, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3495936, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2809344, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2646528, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2363904, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2119680, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1428480, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1367040, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1164288, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 45.08614241746477, "linear_sparsity": 60.1326407967033}, "speed": {"eval_elapsed_time": 25.64635610114783, "cuda_eval_elapsed_time": 18.39916780090332}, "opt_eval_metrics": {"exact_match": 80.68117313150425, "f1": 88.11014400914335}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13/checkpoint-22132": {"stats": {"total": 96101186, "nnz": 47671853, "linear_total": 72155136, "linear_nnz": 23757312, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1420800, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 634368}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1703424, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "2": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2786304, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2649600, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1076736}, "4": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 3124224, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 1158144}, "5": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2449920, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "6": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2388480, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2006016, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 1910784, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1122816, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1291776, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 112128}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 903168, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}}, "total_sparsity": 50.394105437991165, "linear_sparsity": 67.07467643051771}, "speed": {"eval_elapsed_time": 24.566468104720116, "cuda_eval_elapsed_time": 17.369024868011476}, "opt_eval_metrics": {"exact_match": 80.69063386944181, "f1": 88.06386432532665}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10/checkpoint-20000": {"stats": {"total": 274806402, "nnz": 85945974, "linear_total": 242745344, "linear_nnz": 53977088, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 1765376, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 794624, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 811008, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1724416, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 991232, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 1077248, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 1390592, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 604160}, "7": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1748992, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1708032, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 659456}, "9": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 2320384, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3616768, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3653632, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4386816, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4325376, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3792896, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3303424, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3356672, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3252224, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3205120, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 10485760, "linear_nnz": 2367488, "linear_attention_total": 2097152, "linear_attention_nnz": 2097152, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 899072, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1388544, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 866304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1230848, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 68.72490110328653, "linear_sparsity": 77.763903887689}, "speed": {"eval_elapsed_time": 44.92327093007043, "cuda_eval_elapsed_time": 37.62901539611816}, "opt_eval_metrics": {"exact_match": 81.83538315988648, "f1": 88.91108458489386}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10/checkpoint-22132": {"stats": {"total": 274806402, "nnz": 85945974, "linear_total": 242745344, "linear_nnz": 53977088, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 1765376, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 794624, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 811008, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1724416, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 991232, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 1077248, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 1390592, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 604160}, "7": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1748992, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1708032, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 659456}, "9": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 2320384, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3616768, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3653632, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4386816, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4325376, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3792896, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3303424, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3356672, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3252224, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3205120, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 10485760, "linear_nnz": 2367488, "linear_attention_total": 2097152, "linear_attention_nnz": 2097152, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 899072, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1388544, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 866304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1230848, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 68.72490110328653, "linear_sparsity": 77.763903887689}, "speed": {"eval_elapsed_time": 44.89120363816619, "cuda_eval_elapsed_time": 37.62109769439697}, "opt_eval_metrics": {"exact_match": 81.64616840113528, "f1": 88.81401461448195}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-22132": {"stats": {"total": 99446786, "nnz": 54738530, "linear_total": 75497472, "linear_nnz": 30818304, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2502144, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1125888}, "1": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 2268672, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3293184, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1523712}, "3": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3325440, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1555968}, "4": {"total": 768, "nnz": 768, "linear_total": 6881280, "linear_nnz": 3780096, "linear_attention_total": 2162688, "linear_attention_nnz": 2162688, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "5": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 3480576, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 1514496}, "6": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2904576, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1135104}, "7": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2420736, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "8": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2440704, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 474624}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1388544, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 208896}, "10": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1545216, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "11": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1468416, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 485376}}, "total_sparsity": 44.95696422004025, "linear_sparsity": 59.1796875}, "speed": {"eval_elapsed_time": 28.055892461910844, "cuda_eval_elapsed_time": 20.844706882476807}, "opt_eval_metrics": {"exact_match": 81.69347209082308, "f1": 88.72194531479171}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-22132": {"stats": {"total": 93345986, "nnz": 42356011, "linear_total": 69402624, "linear_nnz": 18445824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1198080, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1379328, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1878528, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "3": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2090496, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 714240}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2210304, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1726464, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 743424}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1747968, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1826304, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "8": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1443840, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 264192}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1084416, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 101376}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1070592, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 789504, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}}, "total_sparsity": 54.62471091151151, "linear_sparsity": 73.42200779036827}, "speed": {"eval_elapsed_time": 21.946189539972693, "cuda_eval_elapsed_time": 14.804741985321046}, "opt_eval_metrics": {"exact_match": 79.4228949858089, "f1": 87.22907143184382}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-20000": {"stats": {"total": 93149186, "nnz": 46161559, "linear_total": 69206016, "linear_nnz": 22248960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1634304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1887744, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2400768, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2588160, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2655744, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 2199552, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2131968, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2092032, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1417728, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1155072, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1130496, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 955392, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 50.443411282198426, "linear_sparsity": 67.85111860795455}, "speed": {"eval_elapsed_time": 22.954096999950707, "cuda_eval_elapsed_time": 15.809154163360596}, "opt_eval_metrics": {"exact_match": 80.01892147587512, "f1": 87.70568682399205}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": {"stats": {"total": 93149186, "nnz": 46161559, "linear_total": 69206016, "linear_nnz": 22248960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1634304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1887744, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2400768, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2588160, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2655744, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 2199552, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2131968, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2092032, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1417728, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1155072, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1130496, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 955392, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 50.443411282198426, "linear_sparsity": 67.85111860795455}, "speed": {"eval_elapsed_time": 23.06451029283926, "cuda_eval_elapsed_time": 15.812982402801515}, "opt_eval_metrics": {"exact_match": 80.02838221381268, "f1": 87.70940223967354}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15/checkpoint-22132": {"stats": {"total": 93739586, "nnz": 40333447, "linear_total": 69795840, "linear_nnz": 16424448, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 967680, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1085952, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 299520}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1586688, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 407040}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2013696, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1872384, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1416192, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1517568, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1645056, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 268800}, "8": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1534464, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1056768, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 73728}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1041408, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 58368}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 686592, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 56.972876965767696, "linear_sparsity": 76.46786971830986}, "speed": {"eval_elapsed_time": 21.412942298222333, "cuda_eval_elapsed_time": 14.317796279907228}, "opt_eval_metrics": {"exact_match": 78.82686849574267, "f1": 86.75497848244157}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40/checkpoint-22132": {"stats": {"total": 97281986, "nnz": 45486623, "linear_total": 73334784, "linear_nnz": 21573120, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1477632, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}, "1": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1466880, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 483840}, "2": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2388480, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 619008}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2230272, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 657408}, "4": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2671104, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 705024}, "5": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2241024, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 668160}, "6": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2088960, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 516096}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1760256, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 384000}, "8": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 1973760, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 204288}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1271808, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1253376, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 73728}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 749568, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 159744}}, "total_sparsity": 53.24250164876363, "linear_sparsity": 70.58269101876675}, "speed": {"eval_elapsed_time": 24.223033807240427, "cuda_eval_elapsed_time": 17.122725742340087}, "opt_eval_metrics": {"exact_match": 80.23651844843897, "f1": 87.68464122182475}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42067458, "linear_total": 84934656, "linear_nnz": 18108928, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437184, "linear_attention_total": 2359296, "linear_attention_nnz": 472576, "linear_dense_total": 4718592, "linear_dense_nnz": 964608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1754624, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015488, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2276608, "linear_attention_total": 2359296, "linear_attention_nnz": 951040, "linear_dense_total": 4718592, "linear_dense_nnz": 1325568}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2280448, "linear_attention_total": 2359296, "linear_attention_nnz": 861184, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2123008, "linear_attention_total": 2359296, "linear_attention_nnz": 779008, "linear_dense_total": 4718592, "linear_dense_nnz": 1344000}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841152, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1041408}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553664, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042432, "linear_attention_total": 2359296, "linear_attention_nnz": 610816, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 584960, "linear_attention_total": 2359296, "linear_attention_nnz": 405248, "linear_dense_total": 4718592, "linear_dense_nnz": 179712}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 540928, "linear_attention_total": 2359296, "linear_attention_nnz": 395008, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 658432, "linear_attention_total": 2359296, "linear_attention_nnz": 217600, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}}, "total_sparsity": 61.3681447432349, "linear_sparsity": 78.6789882330247}, "speed": {"eval_elapsed_time": 25.099786044564098, "cuda_eval_elapsed_time": 17.826530269622804}, "opt_eval_metrics": {"exact_match": 79.38505203405866, "f1": 87.07610213911921}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41735426, "linear_total": 84934656, "linear_nnz": 17776896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1405440, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 929280}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1732352, "linear_attention_total": 2359296, "linear_attention_nnz": 589568, "linear_dense_total": 4718592, "linear_dense_nnz": 1142784}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1979136, "linear_attention_total": 2359296, "linear_attention_nnz": 628992, "linear_dense_total": 4718592, "linear_dense_nnz": 1350144}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2218752, "linear_attention_total": 2359296, "linear_attention_nnz": 913152, "linear_dense_total": 4718592, "linear_dense_nnz": 1305600}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2257664, "linear_attention_total": 2359296, "linear_attention_nnz": 850688, "linear_dense_total": 4718592, "linear_dense_nnz": 1406976}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096384, "linear_attention_total": 2359296, "linear_attention_nnz": 764672, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1786112, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 1022976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1538816, "linear_attention_total": 2359296, "linear_attention_nnz": 781568, "linear_dense_total": 4718592, "linear_dense_nnz": 757248}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1027840, "linear_attention_total": 2359296, "linear_attention_nnz": 596224, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 176640}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523008, "linear_attention_total": 2359296, "linear_attention_nnz": 378624, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640000, "linear_attention_total": 2359296, "linear_attention_nnz": 208384, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.67306005721974, "linear_sparsity": 79.0699146412037}, "speed": {"eval_elapsed_time": 24.550941672176123, "cuda_eval_elapsed_time": 17.18915576171875}, "opt_eval_metrics": {"exact_match": 78.72280037842951, "f1": 86.62745564109652}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 43462146, "linear_total": 84934656, "linear_nnz": 19503616, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660672, "linear_attention_total": 2359296, "linear_attention_nnz": 579328, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 632576, "linear_dense_total": 4718592, "linear_dense_nnz": 1267200}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2031104, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1446912}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2544128, "linear_attention_total": 2359296, "linear_attention_nnz": 1049600, "linear_dense_total": 4718592, "linear_dense_nnz": 1494528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2395904, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 1479168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2184960, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 1394688}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1912320, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1113600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 969216, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 471808, "linear_dense_total": 4718592, "linear_dense_nnz": 497664}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 717312, "linear_attention_total": 2359296, "linear_attention_nnz": 505344, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631040, "linear_attention_total": 2359296, "linear_attention_nnz": 448256, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 288256, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}}, "total_sparsity": 60.08735936884057, "linear_sparsity": 77.03691647376543}, "speed": {"eval_elapsed_time": 23.06254121195525, "cuda_eval_elapsed_time": 15.659411109924317}, "opt_eval_metrics": {"exact_match": 78.93093661305582, "f1": 86.85787750084084}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42678018, "linear_total": 84934656, "linear_nnz": 18719488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1560320, "linear_attention_total": 2359296, "linear_attention_nnz": 543488, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1816320, "linear_attention_total": 2359296, "linear_attention_nnz": 593664, "linear_dense_total": 4718592, "linear_dense_nnz": 1222656}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2024704, "linear_attention_total": 2359296, "linear_attention_nnz": 603904, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2329856, "linear_attention_total": 2359296, "linear_attention_nnz": 870656, "linear_dense_total": 4718592, "linear_dense_nnz": 1459200}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2332928, "linear_attention_total": 2359296, "linear_attention_nnz": 887552, "linear_dense_total": 4718592, "linear_dense_nnz": 1445376}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 720640, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742080, "linear_attention_total": 2359296, "linear_attention_nnz": 926464, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 944384, "linear_attention_total": 2359296, "linear_attention_nnz": 455936, "linear_dense_total": 4718592, "linear_dense_nnz": 488448}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705280, "linear_attention_total": 2359296, "linear_attention_nnz": 505600, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 587264, "linear_attention_total": 2359296, "linear_attention_nnz": 409088, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 697856, "linear_attention_total": 2359296, "linear_attention_nnz": 250880, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}}, "total_sparsity": 60.80744850279245, "linear_sparsity": 77.96012972608024}, "speed": {"eval_elapsed_time": 22.749651389196515, "cuda_eval_elapsed_time": 15.405996612548828}, "opt_eval_metrics": {"exact_match": 78.78902554399244, "f1": 86.64151988736798}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 46192898, "linear_total": 84934656, "linear_nnz": 22234368, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 551680, "linear_dense_total": 4718592, "linear_dense_nnz": 1539072}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2278656, "linear_attention_total": 2359296, "linear_attention_nnz": 596736, "linear_dense_total": 4718592, "linear_dense_nnz": 1681920}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2418688, "linear_attention_total": 2359296, "linear_attention_nnz": 567808, "linear_dense_total": 4718592, "linear_dense_nnz": 1850880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2844416, "linear_attention_total": 2359296, "linear_attention_nnz": 1002752, "linear_dense_total": 4718592, "linear_dense_nnz": 1841664}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2691072, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 1812480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2475264, "linear_attention_total": 2359296, "linear_attention_nnz": 721152, "linear_dense_total": 4718592, "linear_dense_nnz": 1754112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2229248, "linear_attention_total": 2359296, "linear_attention_nnz": 805376, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1966336, "linear_attention_total": 2359296, "linear_attention_nnz": 892672, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701440, "linear_attention_total": 2359296, "linear_attention_nnz": 454144, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598272, "linear_attention_total": 2359296, "linear_attention_nnz": 361728, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858880, "linear_attention_total": 2359296, "linear_attention_nnz": 238336, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}}, "total_sparsity": 57.57962486284496, "linear_sparsity": 73.82179542824075}, "speed": {"eval_elapsed_time": 24.459769875742495, "cuda_eval_elapsed_time": 17.10724199295044}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 86.84346997900737}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-130000": {"stats": {"total": 108893186, "nnz": 38778370, "linear_total": 84934656, "linear_nnz": 14819840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1050624, "linear_attention_total": 2359296, "linear_attention_nnz": 488448, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 512512, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1628160, "linear_attention_total": 2359296, "linear_attention_nnz": 628224, "linear_dense_total": 4718592, "linear_dense_nnz": 999936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998592, "linear_attention_total": 2359296, "linear_attention_nnz": 937216, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1939968, "linear_attention_total": 2359296, "linear_attention_nnz": 821760, "linear_dense_total": 4718592, "linear_dense_nnz": 1118208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1709824, "linear_attention_total": 2359296, "linear_attention_nnz": 648448, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404928, "linear_attention_total": 2359296, "linear_attention_nnz": 641536, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817920, "linear_attention_total": 2359296, "linear_attention_nnz": 467712, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 544512, "linear_attention_total": 2359296, "linear_attention_nnz": 403200, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484096, "linear_attention_total": 2359296, "linear_attention_nnz": 367360, "linear_dense_total": 4718592, "linear_dense_nnz": 116736}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496896, "linear_attention_total": 2359296, "linear_attention_nnz": 225024, "linear_dense_total": 4718592, "linear_dense_nnz": 271872}}, "total_sparsity": 64.38861656596218, "linear_sparsity": 82.5514805169753}, "speed": {"eval_elapsed_time": 22.70655928598717, "cuda_eval_elapsed_time": 15.385544715881348}, "opt_eval_metrics": {"exact_match": 78.88363292336803, "f1": 86.63235572290178}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-165000": {"stats": {"total": 108893186, "nnz": 38293506, "linear_total": 84934656, "linear_nnz": 14334976, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1010688, "linear_attention_total": 2359296, "linear_attention_nnz": 468480, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1371392, "linear_attention_total": 2359296, "linear_attention_nnz": 518912, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1590272, "linear_attention_total": 2359296, "linear_attention_nnz": 608768, "linear_dense_total": 4718592, "linear_dense_nnz": 981504}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895936, "linear_attention_total": 2359296, "linear_attention_nnz": 869888, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1869568, "linear_attention_total": 2359296, "linear_attention_nnz": 775936, "linear_dense_total": 4718592, "linear_dense_nnz": 1093632}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663232, "linear_attention_total": 2359296, "linear_attention_nnz": 618752, "linear_dense_total": 4718592, "linear_dense_nnz": 1044480}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 629248, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1295872, "linear_attention_total": 2359296, "linear_attention_nnz": 707584, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 808704, "linear_attention_total": 2359296, "linear_attention_nnz": 463104, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 515840, "linear_attention_total": 2359296, "linear_attention_nnz": 376064, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455936, "linear_attention_total": 2359296, "linear_attention_nnz": 345344, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 212992, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 64.83388225963009, "linear_sparsity": 83.1223476080247}, "speed": {"eval_elapsed_time": 22.61387904593721, "cuda_eval_elapsed_time": 15.282898136138916}, "opt_eval_metrics": {"exact_match": 78.96877956480606, "f1": 86.71968503618079}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38916354, "linear_total": 84934656, "linear_nnz": 14957824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1209344, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 749568}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1494272, "linear_attention_total": 2359296, "linear_attention_nnz": 488192, "linear_dense_total": 4718592, "linear_dense_nnz": 1006080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1636096, "linear_attention_total": 2359296, "linear_attention_nnz": 550144, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969664, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1746944, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 1198080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1782272, "linear_attention_total": 2359296, "linear_attention_nnz": 653312, "linear_dense_total": 4718592, "linear_dense_nnz": 1128960}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1461760, "linear_attention_total": 2359296, "linear_attention_nnz": 593920, "linear_dense_total": 4718592, "linear_dense_nnz": 867840}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 754688, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531968, "linear_attention_total": 2359296, "linear_attention_nnz": 373760, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460032, "linear_attention_total": 2359296, "linear_attention_nnz": 311040, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}}, "total_sparsity": 64.26190156654981, "linear_sparsity": 82.38902150848766}, "speed": {"eval_elapsed_time": 22.325429996009916, "cuda_eval_elapsed_time": 14.97483941268921}, "opt_eval_metrics": {"exact_match": 78.7038789025544, "f1": 86.58426699451658}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 50872322, "linear_total": 84934656, "linear_nnz": 26913792, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692352, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 2007552}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666496, "linear_attention_total": 2359296, "linear_attention_nnz": 646656, "linear_dense_total": 4718592, "linear_dense_nnz": 2019840}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2931200, "linear_attention_total": 2359296, "linear_attention_nnz": 691712, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3361024, "linear_attention_total": 2359296, "linear_attention_nnz": 1149184, "linear_dense_total": 4718592, "linear_dense_nnz": 2211840}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3165952, "linear_attention_total": 2359296, "linear_attention_nnz": 1007872, "linear_dense_total": 4718592, "linear_dense_nnz": 2158080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070976, "linear_attention_total": 2359296, "linear_attention_nnz": 997376, "linear_dense_total": 4718592, "linear_dense_nnz": 2073600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2644480, "linear_attention_total": 2359296, "linear_attention_nnz": 911872, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2248704, "linear_attention_total": 2359296, "linear_attention_nnz": 944640, "linear_dense_total": 4718592, "linear_dense_nnz": 1304064}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1514240, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 751104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 839424, "linear_attention_total": 2359296, "linear_attention_nnz": 526080, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 707072, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 274944}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1071872, "linear_attention_total": 2359296, "linear_attention_nnz": 277760, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}}, "total_sparsity": 53.282364242699266, "linear_sparsity": 68.31235532407408}, "speed": {"eval_elapsed_time": 26.68251951597631, "cuda_eval_elapsed_time": 19.294823177337648}, "opt_eval_metrics": {"exact_match": 79.99053926206244, "f1": 87.56439208763325}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl225_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 27752545, "linear_total": 84934656, "linear_nnz": 3794015, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 465383, "linear_attention_total": 2359296, "linear_attention_nnz": 18728, "linear_dense_total": 4718592, "linear_dense_nnz": 446655}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527397, "linear_attention_total": 2359296, "linear_attention_nnz": 63059, "linear_dense_total": 4718592, "linear_dense_nnz": 464338}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516492, "linear_attention_total": 2359296, "linear_attention_nnz": 53761, "linear_dense_total": 4718592, "linear_dense_nnz": 462731}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 514972, "linear_attention_total": 2359296, "linear_attention_nnz": 84624, "linear_dense_total": 4718592, "linear_dense_nnz": 430348}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 443214, "linear_attention_total": 2359296, "linear_attention_nnz": 58345, "linear_dense_total": 4718592, "linear_dense_nnz": 384869}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 396921, "linear_attention_total": 2359296, "linear_attention_nnz": 50615, "linear_dense_total": 4718592, "linear_dense_nnz": 346306}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 319004, "linear_attention_total": 2359296, "linear_attention_nnz": 41344, "linear_dense_total": 4718592, "linear_dense_nnz": 277660}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 249183, "linear_attention_total": 2359296, "linear_attention_nnz": 47420, "linear_dense_total": 4718592, "linear_dense_nnz": 201763}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 161062, "linear_attention_total": 2359296, "linear_attention_nnz": 27562, "linear_dense_total": 4718592, "linear_dense_nnz": 133500}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 81705, "linear_attention_total": 2359296, "linear_attention_nnz": 34151, "linear_dense_total": 4718592, "linear_dense_nnz": 47554}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64643, "linear_attention_total": 2359296, "linear_attention_nnz": 21311, "linear_dense_total": 4718592, "linear_dense_nnz": 43332}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 54039, "linear_attention_total": 2359296, "linear_attention_nnz": 17233, "linear_dense_total": 4718592, "linear_dense_nnz": 36806}}, "total_sparsity": 74.51397463933142, "linear_sparsity": 95.5330189363456}, "speed": {"eval_elapsed_time": 35.086605437099934, "cuda_eval_elapsed_time": 27.59815271759033}, "opt_eval_metrics": {"exact_match": 77.39829706717124, "f1": 85.66626983371626}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41730197, "linear_total": 84934656, "linear_nnz": 17822208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2202624, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2397696, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1218048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2302464, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1122816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1794048, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1460736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1155072, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1290240, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 503808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 423936, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 227328}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 806400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 904704, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 511488}}, "total_sparsity": 61.67786201057612, "linear_sparsity": 79.0165653935185}, "speed": {"eval_elapsed_time": 19.33693442400545, "cuda_eval_elapsed_time": 12.028588153839111}, "opt_eval_metrics": {"exact_match": 77.70104068117313, "f1": 85.6071153919288}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 33917936, "linear_total": 84934656, "linear_nnz": 9959406, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1111233, "linear_attention_total": 2359296, "linear_attention_nnz": 56754, "linear_dense_total": 4718592, "linear_dense_nnz": 1054479}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222867, "linear_attention_total": 2359296, "linear_attention_nnz": 116764, "linear_dense_total": 4718592, "linear_dense_nnz": 1106103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264439, "linear_attention_total": 2359296, "linear_attention_nnz": 127558, "linear_dense_total": 4718592, "linear_dense_nnz": 1136881}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1270104, "linear_attention_total": 2359296, "linear_attention_nnz": 163709, "linear_dense_total": 4718592, "linear_dense_nnz": 1106395}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1202300, "linear_attention_total": 2359296, "linear_attention_nnz": 158018, "linear_dense_total": 4718592, "linear_dense_nnz": 1044282}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1136195, "linear_attention_total": 2359296, "linear_attention_nnz": 125746, "linear_dense_total": 4718592, "linear_dense_nnz": 1010449}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 971117, "linear_attention_total": 2359296, "linear_attention_nnz": 110023, "linear_dense_total": 4718592, "linear_dense_nnz": 861094}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746075, "linear_attention_total": 2359296, "linear_attention_nnz": 113086, "linear_dense_total": 4718592, "linear_dense_nnz": 632989}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488971, "linear_attention_total": 2359296, "linear_attention_nnz": 81879, "linear_dense_total": 4718592, "linear_dense_nnz": 407092}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 250695, "linear_attention_total": 2359296, "linear_attention_nnz": 77365, "linear_dense_total": 4718592, "linear_dense_nnz": 173330}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 172793, "linear_attention_total": 2359296, "linear_attention_nnz": 50915, "linear_dense_total": 4718592, "linear_dense_nnz": 121878}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 122617, "linear_attention_total": 2359296, "linear_attention_nnz": 28303, "linear_dense_total": 4718592, "linear_dense_nnz": 94314}}, "total_sparsity": 68.85210429971255, "linear_sparsity": 88.27403739646628}, "speed": {"eval_elapsed_time": 40.4544270709157, "cuda_eval_elapsed_time": 32.900185974121094}, "opt_eval_metrics": {"exact_match": 79.89593188268685, "f1": 87.64967103979136}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33825359, "linear_total": 84934656, "linear_nnz": 9866829, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1100628, "linear_attention_total": 2359296, "linear_attention_nnz": 56086, "linear_dense_total": 4718592, "linear_dense_nnz": 1044542}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211778, "linear_attention_total": 2359296, "linear_attention_nnz": 115328, "linear_dense_total": 4718592, "linear_dense_nnz": 1096450}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253069, "linear_attention_total": 2359296, "linear_attention_nnz": 125881, "linear_dense_total": 4718592, "linear_dense_nnz": 1127188}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258511, "linear_attention_total": 2359296, "linear_attention_nnz": 161525, "linear_dense_total": 4718592, "linear_dense_nnz": 1096986}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1191705, "linear_attention_total": 2359296, "linear_attention_nnz": 155911, "linear_dense_total": 4718592, "linear_dense_nnz": 1035794}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125428, "linear_attention_total": 2359296, "linear_attention_nnz": 123921, "linear_dense_total": 4718592, "linear_dense_nnz": 1001507}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 961919, "linear_attention_total": 2359296, "linear_attention_nnz": 108430, "linear_dense_total": 4718592, "linear_dense_nnz": 853489}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738628, "linear_attention_total": 2359296, "linear_attention_nnz": 111505, "linear_dense_total": 4718592, "linear_dense_nnz": 627123}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484188, "linear_attention_total": 2359296, "linear_attention_nnz": 80805, "linear_dense_total": 4718592, "linear_dense_nnz": 403383}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 247948, "linear_attention_total": 2359296, "linear_attention_nnz": 76456, "linear_dense_total": 4718592, "linear_dense_nnz": 171492}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 171235, "linear_attention_total": 2359296, "linear_attention_nnz": 50374, "linear_dense_total": 4718592, "linear_dense_nnz": 120861}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 121792, "linear_attention_total": 2359296, "linear_attention_nnz": 28038, "linear_dense_total": 4718592, "linear_dense_nnz": 93754}}, "total_sparsity": 68.93712063856779, "linear_sparsity": 88.38303530657733}, "speed": {"eval_elapsed_time": 40.57840260397643, "cuda_eval_elapsed_time": 33.03552900695801}, "opt_eval_metrics": {"exact_match": 79.8391674550615, "f1": 87.59923644792065}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl150_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 29470276, "linear_total": 84934656, "linear_nnz": 5511746, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655184, "linear_attention_total": 2359296, "linear_attention_nnz": 30729, "linear_dense_total": 4718592, "linear_dense_nnz": 624455}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 733131, "linear_attention_total": 2359296, "linear_attention_nnz": 77742, "linear_dense_total": 4718592, "linear_dense_nnz": 655389}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730379, "linear_attention_total": 2359296, "linear_attention_nnz": 70206, "linear_dense_total": 4718592, "linear_dense_nnz": 660173}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 734451, "linear_attention_total": 2359296, "linear_attention_nnz": 106339, "linear_dense_total": 4718592, "linear_dense_nnz": 628112}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655863, "linear_attention_total": 2359296, "linear_attention_nnz": 81845, "linear_dense_total": 4718592, "linear_dense_nnz": 574018}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 606306, "linear_attention_total": 2359296, "linear_attention_nnz": 68554, "linear_dense_total": 4718592, "linear_dense_nnz": 537752}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 492846, "linear_attention_total": 2359296, "linear_attention_nnz": 58217, "linear_dense_total": 4718592, "linear_dense_nnz": 434629}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379389, "linear_attention_total": 2359296, "linear_attention_nnz": 65705, "linear_dense_total": 4718592, "linear_dense_nnz": 313684}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 243207, "linear_attention_total": 2359296, "linear_attention_nnz": 39483, "linear_dense_total": 4718592, "linear_dense_nnz": 203724}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 119606, "linear_attention_total": 2359296, "linear_attention_nnz": 46007, "linear_dense_total": 4718592, "linear_dense_nnz": 73599}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 89281, "linear_attention_total": 2359296, "linear_attention_nnz": 27892, "linear_dense_total": 4718592, "linear_dense_nnz": 61389}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 72103, "linear_attention_total": 2359296, "linear_attention_nnz": 20781, "linear_dense_total": 4718592, "linear_dense_nnz": 51322}}, "total_sparsity": 72.93652882926945, "linear_sparsity": 93.51060419906804}, "speed": {"eval_elapsed_time": 37.27218507230282, "cuda_eval_elapsed_time": 29.767933349609375}, "opt_eval_metrics": {"exact_match": 78.4484389782403, "f1": 86.3547925481507}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 72624802, "linear_total": 84934656, "linear_nnz": 48687104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4657152, "linear_attention_total": 2359296, "linear_attention_nnz": 621568, "linear_dense_total": 4718592, "linear_dense_nnz": 4035584}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4759552, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 4155392}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5065728, "linear_attention_total": 2359296, "linear_attention_nnz": 781312, "linear_dense_total": 4718592, "linear_dense_nnz": 4284416}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5408768, "linear_attention_total": 2359296, "linear_attention_nnz": 1068032, "linear_dense_total": 4718592, "linear_dense_nnz": 4340736}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5324800, "linear_attention_total": 2359296, "linear_attention_nnz": 1087488, "linear_dense_total": 4718592, "linear_dense_nnz": 4237312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5189632, "linear_attention_total": 2359296, "linear_attention_nnz": 908288, "linear_dense_total": 4718592, "linear_dense_nnz": 4281344}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5169152, "linear_attention_total": 2359296, "linear_attention_nnz": 1019904, "linear_dense_total": 4718592, "linear_dense_nnz": 4149248}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4749312, "linear_attention_total": 2359296, "linear_attention_nnz": 921600, "linear_dense_total": 4718592, "linear_dense_nnz": 3827712}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3932160, "linear_attention_total": 2359296, "linear_attention_nnz": 851968, "linear_dense_total": 4718592, "linear_dense_nnz": 3080192}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1808384, "linear_attention_total": 2359296, "linear_attention_nnz": 529408, "linear_dense_total": 4718592, "linear_dense_nnz": 1278976}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1443840, "linear_attention_total": 2359296, "linear_attention_nnz": 486400, "linear_dense_total": 4718592, "linear_dense_nnz": 957440}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178624, "linear_attention_total": 2359296, "linear_attention_nnz": 286720, "linear_dense_total": 4718592, "linear_dense_nnz": 891904}}, "total_sparsity": 33.306385213120684, "linear_sparsity": 42.67698688271605}, "speed": {"eval_elapsed_time": 34.84687200607732, "cuda_eval_elapsed_time": 27.35026025390625}, "opt_eval_metrics": {"exact_match": 80.72847682119205, "f1": 88.08831525592305}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 72671586, "linear_total": 84934656, "linear_nnz": 48734208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4676608, "linear_attention_total": 2359296, "linear_attention_nnz": 644096, "linear_dense_total": 4718592, "linear_dense_nnz": 4032512}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4756480, "linear_attention_total": 2359296, "linear_attention_nnz": 583680, "linear_dense_total": 4718592, "linear_dense_nnz": 4172800}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5113856, "linear_attention_total": 2359296, "linear_attention_nnz": 789504, "linear_dense_total": 4718592, "linear_dense_nnz": 4324352}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5421056, "linear_attention_total": 2359296, "linear_attention_nnz": 1028096, "linear_dense_total": 4718592, "linear_dense_nnz": 4392960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5426176, "linear_attention_total": 2359296, "linear_attention_nnz": 1067008, "linear_dense_total": 4718592, "linear_dense_nnz": 4359168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5276672, "linear_attention_total": 2359296, "linear_attention_nnz": 943104, "linear_dense_total": 4718592, "linear_dense_nnz": 4333568}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5165056, "linear_attention_total": 2359296, "linear_attention_nnz": 1003520, "linear_dense_total": 4718592, "linear_dense_nnz": 4161536}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4797440, "linear_attention_total": 2359296, "linear_attention_nnz": 908288, "linear_dense_total": 4718592, "linear_dense_nnz": 3889152}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3890176, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 3021824}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1726464, "linear_attention_total": 2359296, "linear_attention_nnz": 520192, "linear_dense_total": 4718592, "linear_dense_nnz": 1206272}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1336320, "linear_attention_total": 2359296, "linear_attention_nnz": 445440, "linear_dense_total": 4718592, "linear_dense_nnz": 890880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1147904, "linear_attention_total": 2359296, "linear_attention_nnz": 272384, "linear_dense_total": 4718592, "linear_dense_nnz": 875520}}, "total_sparsity": 33.26342201062975, "linear_sparsity": 42.62152777777778}, "speed": {"eval_elapsed_time": 34.81455515883863, "cuda_eval_elapsed_time": 27.30190062713623}, "opt_eval_metrics": {"exact_match": 80.81362346263009, "f1": 88.10463591853348}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 72950082, "linear_total": 84934656, "linear_nnz": 49012736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4754432, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 4140032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4843520, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 4246528}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5145600, "linear_attention_total": 2359296, "linear_attention_nnz": 788480, "linear_dense_total": 4718592, "linear_dense_nnz": 4357120}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5488640, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 4426752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5463040, "linear_attention_total": 2359296, "linear_attention_nnz": 1048576, "linear_dense_total": 4718592, "linear_dense_nnz": 4414464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5317632, "linear_attention_total": 2359296, "linear_attention_nnz": 918528, "linear_dense_total": 4718592, "linear_dense_nnz": 4399104}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5230592, "linear_attention_total": 2359296, "linear_attention_nnz": 998400, "linear_dense_total": 4718592, "linear_dense_nnz": 4232192}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4838400, "linear_attention_total": 2359296, "linear_attention_nnz": 899072, "linear_dense_total": 4718592, "linear_dense_nnz": 3939328}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3848192, "linear_attention_total": 2359296, "linear_attention_nnz": 819200, "linear_dense_total": 4718592, "linear_dense_nnz": 3028992}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1620992, "linear_attention_total": 2359296, "linear_attention_nnz": 516096, "linear_dense_total": 4718592, "linear_dense_nnz": 1104896}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1332224, "linear_attention_total": 2359296, "linear_attention_nnz": 450560, "linear_dense_total": 4718592, "linear_dense_nnz": 881664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1129472, "linear_attention_total": 2359296, "linear_attention_nnz": 266240, "linear_dense_total": 4718592, "linear_dense_nnz": 863232}}, "total_sparsity": 33.00767047076757, "linear_sparsity": 42.29359567901234}, "speed": {"eval_elapsed_time": 34.80252566374838, "cuda_eval_elapsed_time": 27.296903312683106}, "opt_eval_metrics": {"exact_match": 80.53926206244087, "f1": 87.95145431777735}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39356610, "linear_total": 84934656, "linear_nnz": 15444992, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1024000, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 512000}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236992, "linear_attention_total": 2359296, "linear_attention_nnz": 551936, "linear_dense_total": 4718592, "linear_dense_nnz": 685056}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1934336, "linear_attention_total": 2359296, "linear_attention_nnz": 722944, "linear_dense_total": 4718592, "linear_dense_nnz": 1211392}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2352128, "linear_attention_total": 2359296, "linear_attention_nnz": 954368, "linear_dense_total": 4718592, "linear_dense_nnz": 1397760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028544, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1238016}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1880064, "linear_attention_total": 2359296, "linear_attention_nnz": 584704, "linear_dense_total": 4718592, "linear_dense_nnz": 1295360}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1627136, "linear_attention_total": 2359296, "linear_attention_nnz": 608256, "linear_dense_total": 4718592, "linear_dense_nnz": 1018880}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1316864, "linear_attention_total": 2359296, "linear_attention_nnz": 740352, "linear_dense_total": 4718592, "linear_dense_nnz": 576512}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 673792, "linear_attention_total": 2359296, "linear_attention_nnz": 510976, "linear_dense_total": 4718592, "linear_dense_nnz": 162816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 451584, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 94208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 304128, "linear_dense_total": 4718592, "linear_dense_nnz": 197632}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417792, "linear_attention_total": 2359296, "linear_attention_nnz": 197632, "linear_dense_total": 4718592, "linear_dense_nnz": 220160}}, "total_sparsity": 63.85760078688487, "linear_sparsity": 81.81544174382715}, "speed": {"eval_elapsed_time": 24.229180959053338, "cuda_eval_elapsed_time": 16.875545894622803}, "opt_eval_metrics": {"exact_match": 76.79280983916746, "f1": 85.3167029862563}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39183362, "linear_total": 84934656, "linear_nnz": 15271936, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1032192, "linear_attention_total": 2359296, "linear_attention_nnz": 513024, "linear_dense_total": 4718592, "linear_dense_nnz": 519168}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1215488, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 692224}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1922048, "linear_attention_total": 2359296, "linear_attention_nnz": 683008, "linear_dense_total": 4718592, "linear_dense_nnz": 1239040}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2319360, "linear_attention_total": 2359296, "linear_attention_nnz": 945152, "linear_dense_total": 4718592, "linear_dense_nnz": 1374208}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045952, "linear_attention_total": 2359296, "linear_attention_nnz": 809984, "linear_dense_total": 4718592, "linear_dense_nnz": 1235968}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1847296, "linear_attention_total": 2359296, "linear_attention_nnz": 581632, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1607680, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1287168, "linear_attention_total": 2359296, "linear_attention_nnz": 708608, "linear_dense_total": 4718592, "linear_dense_nnz": 578560}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631808, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 158720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 442368, "linear_attention_total": 2359296, "linear_attention_nnz": 352256, "linear_dense_total": 4718592, "linear_dense_nnz": 90112}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 312320, "linear_dense_total": 4718592, "linear_dense_nnz": 206848}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 401408, "linear_attention_total": 2359296, "linear_attention_nnz": 186368, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 64.01669981444019, "linear_sparsity": 82.0191936728395}, "speed": {"eval_elapsed_time": 24.20139658311382, "cuda_eval_elapsed_time": 16.840975036621096}, "opt_eval_metrics": {"exact_match": 77.04824976348155, "f1": 85.17930403802184}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 37334018, "linear_total": 84934656, "linear_nnz": 13375488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663488, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1282560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451520, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 566784}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1385472, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1265664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 675840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1069056, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.7150099364344, "linear_sparsity": 84.25202546296296}, "speed": {"eval_elapsed_time": 18.199997384101152, "cuda_eval_elapsed_time": 10.914331413269043}, "opt_eval_metrics": {"exact_match": 76.54683065279092, "f1": 84.56290825102765}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37189634, "linear_total": 84934656, "linear_nnz": 13231104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1658880, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1233408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1354752, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1387008, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 678912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.84760225492897, "linear_sparsity": 84.42201967592592}, "speed": {"eval_elapsed_time": 18.107254439033568, "cuda_eval_elapsed_time": 10.818288433074951}, "opt_eval_metrics": {"exact_match": 75.99810785241249, "f1": 84.26442986520863}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36773378, "linear_total": 84934656, "linear_nnz": 12814848, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1044480, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1177088, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1450496, "linear_attention_total": 2359296, "linear_attention_nnz": 492032, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652224, "linear_attention_total": 2359296, "linear_attention_nnz": 733696, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1511680, "linear_attention_total": 2359296, "linear_attention_nnz": 461056, "linear_dense_total": 4718592, "linear_dense_nnz": 1050624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1533952, "linear_attention_total": 2359296, "linear_attention_nnz": 580096, "linear_dense_total": 4718592, "linear_dense_nnz": 953856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1227520, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 764928}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 624384, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 700416, "linear_attention_total": 2359296, "linear_attention_nnz": 351744, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 479744, "linear_attention_total": 2359296, "linear_attention_nnz": 339968, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411392, "linear_attention_total": 2359296, "linear_attention_nnz": 276224, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 430080, "linear_attention_total": 2359296, "linear_attention_nnz": 178176, "linear_dense_total": 4718592, "linear_dense_nnz": 251904}}, "total_sparsity": 66.22986308803564, "linear_sparsity": 84.912109375}, "speed": {"eval_elapsed_time": 21.51417324412614, "cuda_eval_elapsed_time": 14.18737794494629}, "opt_eval_metrics": {"exact_match": 77.94701986754967, "f1": 86.06827252573265}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 47136529, "linear_total": 84934656, "linear_nnz": 23220736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1984512, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2164224, "linear_attention_total": 2359296, "linear_attention_nnz": 592896, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2625536, "linear_attention_total": 2359296, "linear_attention_nnz": 880640, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2992640, "linear_attention_total": 2359296, "linear_attention_nnz": 1230848, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2940928, "linear_attention_total": 2359296, "linear_attention_nnz": 1214464, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2535936, "linear_attention_total": 2359296, "linear_attention_nnz": 906240, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2213376, "linear_attention_total": 2359296, "linear_attention_nnz": 943104, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1923584, "linear_attention_total": 2359296, "linear_attention_nnz": 935936, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1419264, "linear_attention_total": 2359296, "linear_attention_nnz": 872448, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 883712, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667648, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 869376, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 56.713059162397904, "linear_sparsity": 72.66046971450618}, "speed": {"eval_elapsed_time": 25.703615359961987, "cuda_eval_elapsed_time": 18.344205406188966}, "opt_eval_metrics": {"exact_match": 80.27436140018922, "f1": 87.70461789964966}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46572775, "linear_total": 84934656, "linear_nnz": 22657536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1908736, "linear_attention_total": 2359296, "linear_attention_nnz": 627712, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2145280, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 1548288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2499072, "linear_attention_total": 2359296, "linear_attention_nnz": 789504, "linear_dense_total": 4718592, "linear_dense_nnz": 1709568}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2920960, "linear_attention_total": 2359296, "linear_attention_nnz": 1180672, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1204224, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2516992, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 1600512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2151936, "linear_attention_total": 2359296, "linear_attention_nnz": 909312, "linear_dense_total": 4718592, "linear_dense_nnz": 1242624}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1889792, "linear_attention_total": 2359296, "linear_attention_nnz": 917504, "linear_dense_total": 4718592, "linear_dense_nnz": 972288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1398272, "linear_attention_total": 2359296, "linear_attention_nnz": 856064, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858624, "linear_attention_total": 2359296, "linear_attention_nnz": 611328, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 634368, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 827392, "linear_attention_total": 2359296, "linear_attention_nnz": 268288, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}}, "total_sparsity": 57.23077199706509, "linear_sparsity": 73.32356770833333}, "speed": {"eval_elapsed_time": 25.57048795511946, "cuda_eval_elapsed_time": 18.227574043273925}, "opt_eval_metrics": {"exact_match": 79.80132450331126, "f1": 87.48291010744668}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-10000": {"stats": {"total": 108893186, "nnz": 107798786, "linear_total": 84934656, "linear_nnz": 83840256, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6865920, "linear_attention_total": 2359296, "linear_attention_nnz": 2151936, "linear_dense_total": 4718592, "linear_dense_nnz": 4713984}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7009024, "linear_attention_total": 2359296, "linear_attention_nnz": 2299648, "linear_dense_total": 4718592, "linear_dense_nnz": 4709376}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7037952, "linear_attention_total": 2359296, "linear_attention_nnz": 2330112, "linear_dense_total": 4718592, "linear_dense_nnz": 4707840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7041024, "linear_attention_total": 2359296, "linear_attention_nnz": 2330112, "linear_dense_total": 4718592, "linear_dense_nnz": 4710912}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7029760, "linear_attention_total": 2359296, "linear_attention_nnz": 2324992, "linear_dense_total": 4718592, "linear_dense_nnz": 4704768}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7043584, "linear_attention_total": 2359296, "linear_attention_nnz": 2337280, "linear_dense_total": 4718592, "linear_dense_nnz": 4706304}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7006464, "linear_attention_total": 2359296, "linear_attention_nnz": 2321664, "linear_dense_total": 4718592, "linear_dense_nnz": 4684800}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7025664, "linear_attention_total": 2359296, "linear_attention_nnz": 2342400, "linear_dense_total": 4718592, "linear_dense_nnz": 4683264}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6950656, "linear_attention_total": 2359296, "linear_attention_nnz": 2296576, "linear_dense_total": 4718592, "linear_dense_nnz": 4654080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6905600, "linear_attention_total": 2359296, "linear_attention_nnz": 2259200, "linear_dense_total": 4718592, "linear_dense_nnz": 4646400}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6924288, "linear_attention_total": 2359296, "linear_attention_nnz": 2285568, "linear_dense_total": 4718592, "linear_dense_nnz": 4638720}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7000320, "linear_attention_total": 2359296, "linear_attention_nnz": 2312448, "linear_dense_total": 4718592, "linear_dense_nnz": 4687872}}, "total_sparsity": 1.005021563057218, "linear_sparsity": 1.288519965277779}, "speed": {"eval_elapsed_time": 47.84682997409254, "cuda_eval_elapsed_time": 40.35024221801758}, "opt_eval_metrics": {"exact_match": 80.90823084200568, "f1": 88.13888839423888}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-15000": {"stats": {"total": 108893186, "nnz": 104455426, "linear_total": 84934656, "linear_nnz": 80496896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6593280, "linear_attention_total": 2359296, "linear_attention_nnz": 1914624, "linear_dense_total": 4718592, "linear_dense_nnz": 4678656}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6772736, "linear_attention_total": 2359296, "linear_attention_nnz": 2103296, "linear_dense_total": 4718592, "linear_dense_nnz": 4669440}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6885632, "linear_attention_total": 2359296, "linear_attention_nnz": 2239232, "linear_dense_total": 4718592, "linear_dense_nnz": 4646400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6876672, "linear_attention_total": 2359296, "linear_attention_nnz": 2219520, "linear_dense_total": 4718592, "linear_dense_nnz": 4657152}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6873600, "linear_attention_total": 2359296, "linear_attention_nnz": 2216448, "linear_dense_total": 4718592, "linear_dense_nnz": 4657152}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6841856, "linear_attention_total": 2359296, "linear_attention_nnz": 2226176, "linear_dense_total": 4718592, "linear_dense_nnz": 4615680}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6786560, "linear_attention_total": 2359296, "linear_attention_nnz": 2190848, "linear_dense_total": 4718592, "linear_dense_nnz": 4595712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6811392, "linear_attention_total": 2359296, "linear_attention_nnz": 2261760, "linear_dense_total": 4718592, "linear_dense_nnz": 4549632}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6609408, "linear_attention_total": 2359296, "linear_attention_nnz": 2178048, "linear_dense_total": 4718592, "linear_dense_nnz": 4431360}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6399744, "linear_attention_total": 2359296, "linear_attention_nnz": 2049792, "linear_dense_total": 4718592, "linear_dense_nnz": 4349952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6406656, "linear_attention_total": 2359296, "linear_attention_nnz": 2053632, "linear_dense_total": 4718592, "linear_dense_nnz": 4353024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6639360, "linear_attention_total": 2359296, "linear_attention_nnz": 2100480, "linear_dense_total": 4718592, "linear_dense_nnz": 4538880}}, "total_sparsity": 4.075333051601593, "linear_sparsity": 5.224910783179015}, "speed": {"eval_elapsed_time": 47.34199761413038, "cuda_eval_elapsed_time": 39.7485433807373}, "opt_eval_metrics": {"exact_match": 78.21192052980132, "f1": 86.2154189083501}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 58295010, "linear_total": 84934656, "linear_nnz": 34364416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2712576, "linear_attention_total": 2359296, "linear_attention_nnz": 934912, "linear_dense_total": 4718592, "linear_dense_nnz": 1777664}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2907136, "linear_attention_total": 2359296, "linear_attention_nnz": 738304, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4201472, "linear_attention_total": 2359296, "linear_attention_nnz": 1162240, "linear_dense_total": 4718592, "linear_dense_nnz": 3039232}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4531200, "linear_attention_total": 2359296, "linear_attention_nnz": 1366016, "linear_dense_total": 4718592, "linear_dense_nnz": 3165184}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4667392, "linear_attention_total": 2359296, "linear_attention_nnz": 1484800, "linear_dense_total": 4718592, "linear_dense_nnz": 3182592}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4599808, "linear_attention_total": 2359296, "linear_attention_nnz": 1414144, "linear_dense_total": 4718592, "linear_dense_nnz": 3185664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3828736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 2572288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2659328, "linear_attention_total": 2359296, "linear_attention_nnz": 991232, "linear_dense_total": 4718592, "linear_dense_nnz": 1668096}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1654784, "linear_attention_total": 2359296, "linear_attention_nnz": 966656, "linear_dense_total": 4718592, "linear_dense_nnz": 688128}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 927744, "linear_attention_total": 2359296, "linear_attention_nnz": 691200, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 873472, "linear_attention_total": 2359296, "linear_attention_nnz": 530432, "linear_dense_total": 4718592, "linear_dense_nnz": 343040}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 800768, "linear_attention_total": 2359296, "linear_attention_nnz": 378880, "linear_dense_total": 4718592, "linear_dense_nnz": 421888}}, "total_sparsity": 46.46587895775224, "linear_sparsity": 59.540171682098766}, "speed": {"eval_elapsed_time": 32.721989285200834, "cuda_eval_elapsed_time": 25.26232120513916}, "opt_eval_metrics": {"exact_match": 79.64995269631031, "f1": 87.30139925832849}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 56885634, "linear_total": 84934656, "linear_nnz": 32956416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2584576, "linear_attention_total": 2359296, "linear_attention_nnz": 949248, "linear_dense_total": 4718592, "linear_dense_nnz": 1635328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2798592, "linear_attention_total": 2359296, "linear_attention_nnz": 750592, "linear_dense_total": 4718592, "linear_dense_nnz": 2048000}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4019200, "linear_attention_total": 2359296, "linear_attention_nnz": 1123328, "linear_dense_total": 4718592, "linear_dense_nnz": 2895872}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4274176, "linear_attention_total": 2359296, "linear_attention_nnz": 1306624, "linear_dense_total": 4718592, "linear_dense_nnz": 2967552}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4581376, "linear_attention_total": 2359296, "linear_attention_nnz": 1475584, "linear_dense_total": 4718592, "linear_dense_nnz": 3105792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4219904, "linear_attention_total": 2359296, "linear_attention_nnz": 1285120, "linear_dense_total": 4718592, "linear_dense_nnz": 2934784}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3736576, "linear_attention_total": 2359296, "linear_attention_nnz": 1235968, "linear_dense_total": 4718592, "linear_dense_nnz": 2500608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2587648, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1604608}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1627136, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 661504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 880640, "linear_attention_total": 2359296, "linear_attention_nnz": 650240, "linear_dense_total": 4718592, "linear_dense_nnz": 230400}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 862208, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 352256}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784384, "linear_attention_total": 2359296, "linear_attention_nnz": 363520, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}}, "total_sparsity": 47.76015277944021, "linear_sparsity": 61.19791666666667}, "speed": {"eval_elapsed_time": 32.15040939580649, "cuda_eval_elapsed_time": 24.665162628173828}, "opt_eval_metrics": {"exact_match": 79.90539262062441, "f1": 87.36378709007766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 55520034, "linear_total": 84934656, "linear_nnz": 31592448, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2382848, "linear_attention_total": 2359296, "linear_attention_nnz": 889856, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2568192, "linear_attention_total": 2359296, "linear_attention_nnz": 717824, "linear_dense_total": 4718592, "linear_dense_nnz": 1850368}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3915776, "linear_attention_total": 2359296, "linear_attention_nnz": 1113088, "linear_dense_total": 4718592, "linear_dense_nnz": 2802688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4258816, "linear_attention_total": 2359296, "linear_attention_nnz": 1297408, "linear_dense_total": 4718592, "linear_dense_nnz": 2961408}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4300800, "linear_attention_total": 2359296, "linear_attention_nnz": 1402880, "linear_dense_total": 4718592, "linear_dense_nnz": 2897920}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4030464, "linear_attention_total": 2359296, "linear_attention_nnz": 1157120, "linear_dense_total": 4718592, "linear_dense_nnz": 2873344}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3661824, "linear_attention_total": 2359296, "linear_attention_nnz": 1187840, "linear_dense_total": 4718592, "linear_dense_nnz": 2473984}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2507776, "linear_attention_total": 2359296, "linear_attention_nnz": 979968, "linear_dense_total": 4718592, "linear_dense_nnz": 1527808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1562624, "linear_attention_total": 2359296, "linear_attention_nnz": 952320, "linear_dense_total": 4718592, "linear_dense_nnz": 610304}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 865280, "linear_attention_total": 2359296, "linear_attention_nnz": 642048, "linear_dense_total": 4718592, "linear_dense_nnz": 223232}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818176, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 328704}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 719872, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 388096}}, "total_sparsity": 49.0142257386059, "linear_sparsity": 62.80381944444444}, "speed": {"eval_elapsed_time": 31.509735165163875, "cuda_eval_elapsed_time": 24.0460672454834}, "opt_eval_metrics": {"exact_match": 79.82024597918638, "f1": 87.30735739624531}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 55329122, "linear_total": 84934656, "linear_nnz": 31404032, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2845696, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 2385920}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3173376, "linear_attention_total": 2359296, "linear_attention_nnz": 374784, "linear_dense_total": 4718592, "linear_dense_nnz": 2798592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3866624, "linear_attention_total": 2359296, "linear_attention_nnz": 411648, "linear_dense_total": 4718592, "linear_dense_nnz": 3454976}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4224000, "linear_attention_total": 2359296, "linear_attention_nnz": 727040, "linear_dense_total": 4718592, "linear_dense_nnz": 3496960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3954688, "linear_attention_total": 2359296, "linear_attention_nnz": 541696, "linear_dense_total": 4718592, "linear_dense_nnz": 3412992}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3993600, "linear_attention_total": 2359296, "linear_attention_nnz": 545792, "linear_dense_total": 4718592, "linear_dense_nnz": 3447808}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3427328, "linear_attention_total": 2359296, "linear_attention_nnz": 493568, "linear_dense_total": 4718592, "linear_dense_nnz": 2933760}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2641920, "linear_attention_total": 2359296, "linear_attention_nnz": 641024, "linear_dense_total": 4718592, "linear_dense_nnz": 2000896}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1293312, "linear_attention_total": 2359296, "linear_attention_nnz": 288768, "linear_dense_total": 4718592, "linear_dense_nnz": 1004544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 678912, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 339968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 671744, "linear_attention_total": 2359296, "linear_attention_nnz": 254976, "linear_dense_total": 4718592, "linear_dense_nnz": 416768}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 632832, "linear_attention_total": 2359296, "linear_attention_nnz": 165888, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}}, "total_sparsity": 49.1895461668281, "linear_sparsity": 63.025655864197525}, "speed": {"eval_elapsed_time": 28.92266427911818, "cuda_eval_elapsed_time": 21.433052574157717}, "opt_eval_metrics": {"exact_match": 77.68211920529801, "f1": 86.11161494070976}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 54788706, "linear_total": 84934656, "linear_nnz": 30864384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2661376, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 2226176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3087360, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 2727936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3895296, "linear_attention_total": 2359296, "linear_attention_nnz": 421888, "linear_dense_total": 4718592, "linear_dense_nnz": 3473408}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4162560, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 3451904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3985408, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 3437568}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3881984, "linear_attention_total": 2359296, "linear_attention_nnz": 556032, "linear_dense_total": 4718592, "linear_dense_nnz": 3325952}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3340288, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 2828288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2614272, "linear_attention_total": 2359296, "linear_attention_nnz": 622592, "linear_dense_total": 4718592, "linear_dense_nnz": 1991680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1256448, "linear_attention_total": 2359296, "linear_attention_nnz": 276480, "linear_dense_total": 4718592, "linear_dense_nnz": 979968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668672, "linear_attention_total": 2359296, "linear_attention_nnz": 337920, "linear_dense_total": 4718592, "linear_dense_nnz": 330752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 664576, "linear_attention_total": 2359296, "linear_attention_nnz": 252928, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 646144, "linear_attention_total": 2359296, "linear_attention_nnz": 158720, "linear_dense_total": 4718592, "linear_dense_nnz": 487424}}, "total_sparsity": 49.68582699012958, "linear_sparsity": 63.66102430555556}, "speed": {"eval_elapsed_time": 28.59047580976039, "cuda_eval_elapsed_time": 21.152217895507814}, "opt_eval_metrics": {"exact_match": 77.96594134342479, "f1": 86.01491496793933}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53430466, "linear_total": 84934656, "linear_nnz": 29507584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2533376, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 2119680}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2840576, "linear_attention_total": 2359296, "linear_attention_nnz": 364544, "linear_dense_total": 4718592, "linear_dense_nnz": 2476032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3843072, "linear_attention_total": 2359296, "linear_attention_nnz": 397312, "linear_dense_total": 4718592, "linear_dense_nnz": 3445760}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4069376, "linear_attention_total": 2359296, "linear_attention_nnz": 666624, "linear_dense_total": 4718592, "linear_dense_nnz": 3402752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3831808, "linear_attention_total": 2359296, "linear_attention_nnz": 492544, "linear_dense_total": 4718592, "linear_dense_nnz": 3339264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3714048, "linear_attention_total": 2359296, "linear_attention_nnz": 519168, "linear_dense_total": 4718592, "linear_dense_nnz": 3194880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3200000, "linear_attention_total": 2359296, "linear_attention_nnz": 448512, "linear_dense_total": 4718592, "linear_dense_nnz": 2751488}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415616, "linear_attention_total": 2359296, "linear_attention_nnz": 576512, "linear_dense_total": 4718592, "linear_dense_nnz": 1839104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211392, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619520, "linear_attention_total": 2359296, "linear_attention_nnz": 317440, "linear_dense_total": 4718592, "linear_dense_nnz": 302080}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 623616, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 386048}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 50.93314103235074, "linear_sparsity": 65.25848765432099}, "speed": {"eval_elapsed_time": 28.19032474886626, "cuda_eval_elapsed_time": 20.62959659576416}, "opt_eval_metrics": {"exact_match": 77.70104068117313, "f1": 85.88451743537976}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 40733175, "linear_total": 84934656, "linear_nnz": 16822784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1394688, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1640960, "linear_attention_total": 2359296, "linear_attention_nnz": 539648, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1878528, "linear_attention_total": 2359296, "linear_attention_nnz": 657408, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 864256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1902592, "linear_attention_total": 2359296, "linear_attention_nnz": 686080, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601536, "linear_attention_total": 2359296, "linear_attention_nnz": 649216, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1507328, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908800, "linear_attention_total": 2359296, "linear_attention_nnz": 474112, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 354304, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591872, "linear_attention_total": 2359296, "linear_attention_nnz": 226304, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 62.593458327135366, "linear_sparsity": 80.19326292438271}, "speed": {"eval_elapsed_time": 23.045843845698982, "cuda_eval_elapsed_time": 15.740128681182862}, "opt_eval_metrics": {"exact_match": 78.68495742667928, "f1": 86.66781681977909}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40239113, "linear_total": 84934656, "linear_nnz": 16329216, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1344512, "linear_attention_total": 2359296, "linear_attention_nnz": 518144, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 516096, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842176, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2097664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1184256}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2056192, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 664576, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1565184, "linear_attention_total": 2359296, "linear_attention_nnz": 629760, "linear_dense_total": 4718592, "linear_dense_nnz": 935424}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1486336, "linear_attention_total": 2359296, "linear_attention_nnz": 787456, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 844288, "linear_attention_total": 2359296, "linear_attention_nnz": 415744, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 423936, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 472064, "linear_attention_total": 2359296, "linear_attention_nnz": 324608, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555520, "linear_attention_total": 2359296, "linear_attention_nnz": 209920, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}}, "total_sparsity": 63.04717083032174, "linear_sparsity": 80.7743778935185}, "speed": {"eval_elapsed_time": 22.898354202043265, "cuda_eval_elapsed_time": 15.578330577850343}, "opt_eval_metrics": {"exact_match": 78.80794701986756, "f1": 86.74156854566804}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.9999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-5000": {"stats": {"total": 108893186, "nnz": 108881410, "linear_total": 84934656, "linear_nnz": 84922880, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7072768, "linear_attention_total": 2359296, "linear_attention_nnz": 2354176, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7076352, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4717056}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7076864, "linear_attention_total": 2359296, "linear_attention_nnz": 2358272, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7076864, "linear_attention_total": 2359296, "linear_attention_nnz": 2358272, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7074816, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4715520}}, "total_sparsity": 0.010814267111258768, "linear_sparsity": 0.013864776234573384}, "speed": {"eval_elapsed_time": 46.97493433812633, "cuda_eval_elapsed_time": 39.4461767578125}, "opt_eval_metrics": {"exact_match": 78.7038789025544, "f1": 86.6699349353281}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 43189250, "linear_total": 84934656, "linear_nnz": 19230720, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1584896, "linear_attention_total": 2359296, "linear_attention_nnz": 494336, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1917184, "linear_attention_total": 2359296, "linear_attention_nnz": 631552, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2092032, "linear_attention_total": 2359296, "linear_attention_nnz": 648192, "linear_dense_total": 4718592, "linear_dense_nnz": 1443840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2466816, "linear_attention_total": 2359296, "linear_attention_nnz": 1047552, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2403328, "linear_attention_total": 2359296, "linear_attention_nnz": 942592, "linear_dense_total": 4718592, "linear_dense_nnz": 1460736}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2211072, "linear_attention_total": 2359296, "linear_attention_nnz": 837888, "linear_dense_total": 4718592, "linear_dense_nnz": 1373184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1936640, "linear_attention_total": 2359296, "linear_attention_nnz": 841472, "linear_dense_total": 4718592, "linear_dense_nnz": 1095168}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661440, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 827904}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1084160, "linear_attention_total": 2359296, "linear_attention_nnz": 621824, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621056, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 188928}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 568064, "linear_attention_total": 2359296, "linear_attention_nnz": 411392, "linear_dense_total": 4718592, "linear_dense_nnz": 156672}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 684032, "linear_attention_total": 2359296, "linear_attention_nnz": 223232, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 60.33796825450584, "linear_sparsity": 77.3582175925926}, "speed": {"eval_elapsed_time": 25.196182542014867, "cuda_eval_elapsed_time": 17.847160907745362}, "opt_eval_metrics": {"exact_match": 78.73226111636707, "f1": 86.74884583609185}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42070530, "linear_total": 84934656, "linear_nnz": 18112000, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451008, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 970752}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835264, "linear_attention_total": 2359296, "linear_attention_nnz": 620288, "linear_dense_total": 4718592, "linear_dense_nnz": 1214976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2000384, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1374720}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2271232, "linear_attention_total": 2359296, "linear_attention_nnz": 933376, "linear_dense_total": 4718592, "linear_dense_nnz": 1337856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2267904, "linear_attention_total": 2359296, "linear_attention_nnz": 862464, "linear_dense_total": 4718592, "linear_dense_nnz": 1405440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081536, "linear_attention_total": 2359296, "linear_attention_nnz": 783616, "linear_dense_total": 4718592, "linear_dense_nnz": 1297920}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1807104, "linear_attention_total": 2359296, "linear_attention_nnz": 773376, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602048, "linear_attention_total": 2359296, "linear_attention_nnz": 811008, "linear_dense_total": 4718592, "linear_dense_nnz": 791040}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1009920, "linear_attention_total": 2359296, "linear_attention_nnz": 572160, "linear_dense_total": 4718592, "linear_dense_nnz": 437760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 543232, "linear_attention_total": 2359296, "linear_attention_nnz": 392704, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649472, "linear_attention_total": 2359296, "linear_attention_nnz": 214784, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}}, "total_sparsity": 61.365323630075444, "linear_sparsity": 78.67537133487654}, "speed": {"eval_elapsed_time": 25.041145149618387, "cuda_eval_elapsed_time": 17.693899471282958}, "opt_eval_metrics": {"exact_match": 78.97824030274361, "f1": 86.77789246016766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41670402, "linear_total": 84934656, "linear_nnz": 17711872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1409024, "linear_attention_total": 2359296, "linear_attention_nnz": 468992, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1792000, "linear_attention_total": 2359296, "linear_attention_nnz": 606208, "linear_dense_total": 4718592, "linear_dense_nnz": 1185792}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1974272, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1348608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2231552, "linear_attention_total": 2359296, "linear_attention_nnz": 910592, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2209536, "linear_attention_total": 2359296, "linear_attention_nnz": 828672, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2046464, "linear_attention_total": 2359296, "linear_attention_nnz": 765440, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1764096, "linear_attention_total": 2359296, "linear_attention_nnz": 761088, "linear_dense_total": 4718592, "linear_dense_nnz": 1003008}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1573120, "linear_attention_total": 2359296, "linear_attention_nnz": 792832, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 986880, "linear_attention_total": 2359296, "linear_attention_nnz": 553728, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 572672, "linear_attention_total": 2359296, "linear_attention_nnz": 389888, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 525568, "linear_attention_total": 2359296, "linear_attention_nnz": 378112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}}, "total_sparsity": 61.73277361909495, "linear_sparsity": 79.14647231867285}, "speed": {"eval_elapsed_time": 24.600313658826053, "cuda_eval_elapsed_time": 17.256864818573}, "opt_eval_metrics": {"exact_match": 78.74172185430463, "f1": 86.69521763053608}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41478658, "linear_total": 84934656, "linear_nnz": 17520128, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404160, "linear_attention_total": 2359296, "linear_attention_nnz": 465664, "linear_dense_total": 4718592, "linear_dense_nnz": 938496}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1766912, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1182720}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1961216, "linear_attention_total": 2359296, "linear_attention_nnz": 615680, "linear_dense_total": 4718592, "linear_dense_nnz": 1345536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 895488, "linear_dense_total": 4718592, "linear_dense_nnz": 1314816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2189824, "linear_attention_total": 2359296, "linear_attention_nnz": 812032, "linear_dense_total": 4718592, "linear_dense_nnz": 1377792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2038016, "linear_attention_total": 2359296, "linear_attention_nnz": 755456, "linear_dense_total": 4718592, "linear_dense_nnz": 1282560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1738240, "linear_attention_total": 2359296, "linear_attention_nnz": 739840, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571584, "linear_attention_total": 2359296, "linear_attention_nnz": 797440, "linear_dense_total": 4718592, "linear_dense_nnz": 774144}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 943872, "linear_attention_total": 2359296, "linear_attention_nnz": 513792, "linear_dense_total": 4718592, "linear_dense_nnz": 430080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563968, "linear_attention_total": 2359296, "linear_attention_nnz": 381184, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516352, "linear_attention_total": 2359296, "linear_attention_nnz": 370432, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 615680, "linear_attention_total": 2359296, "linear_attention_nnz": 200960, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}}, "total_sparsity": 61.90885809879785, "linear_sparsity": 79.37222704475309}, "speed": {"eval_elapsed_time": 24.630300242919475, "cuda_eval_elapsed_time": 17.290757038116457}, "opt_eval_metrics": {"exact_match": 78.84578997161779, "f1": 86.78133258210022}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40218943, "linear_total": 84934656, "linear_nnz": 16260413, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725694, "linear_attention_total": 2359296, "linear_attention_nnz": 36794, "linear_dense_total": 4718592, "linear_dense_nnz": 1688900}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959620, "linear_attention_total": 2359296, "linear_attention_nnz": 233028, "linear_dense_total": 4718592, "linear_dense_nnz": 1726592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969125, "linear_attention_total": 2359296, "linear_attention_nnz": 194318, "linear_dense_total": 4718592, "linear_dense_nnz": 1774807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2012358, "linear_attention_total": 2359296, "linear_attention_nnz": 270153, "linear_dense_total": 4718592, "linear_dense_nnz": 1742205}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860862, "linear_attention_total": 2359296, "linear_attention_nnz": 207935, "linear_dense_total": 4718592, "linear_dense_nnz": 1652927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815188, "linear_attention_total": 2359296, "linear_attention_nnz": 215427, "linear_dense_total": 4718592, "linear_dense_nnz": 1599761}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518978, "linear_attention_total": 2359296, "linear_attention_nnz": 114563, "linear_dense_total": 4718592, "linear_dense_nnz": 1404415}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307646, "linear_attention_total": 2359296, "linear_attention_nnz": 165011, "linear_dense_total": 4718592, "linear_dense_nnz": 1142635}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 946142, "linear_attention_total": 2359296, "linear_attention_nnz": 86589, "linear_dense_total": 4718592, "linear_dense_nnz": 859553}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531809, "linear_attention_total": 2359296, "linear_attention_nnz": 110020, "linear_dense_total": 4718592, "linear_dense_nnz": 421789}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 419075, "linear_attention_total": 2359296, "linear_attention_nnz": 89475, "linear_dense_total": 4718592, "linear_dense_nnz": 329600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 193916, "linear_attention_total": 2359296, "linear_attention_nnz": 45791, "linear_dense_total": 4718592, "linear_dense_nnz": 148125}}, "total_sparsity": 63.065693568741764, "linear_sparsity": 80.85538487375518}, "speed": {"eval_elapsed_time": 32.1213519689627, "cuda_eval_elapsed_time": 24.584835762023925}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.30981160352648}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl5_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 81807426, "linear_total": 84934656, "linear_nnz": 57862144, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5237760, "linear_attention_total": 2359296, "linear_attention_nnz": 921600, "linear_dense_total": 4718592, "linear_dense_nnz": 4316160}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5140480, "linear_attention_total": 2359296, "linear_attention_nnz": 829440, "linear_dense_total": 4718592, "linear_dense_nnz": 4311040}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5641216, "linear_attention_total": 2359296, "linear_attention_nnz": 1221632, "linear_dense_total": 4718592, "linear_dense_nnz": 4419584}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5816320, "linear_attention_total": 2359296, "linear_attention_nnz": 1386496, "linear_dense_total": 4718592, "linear_dense_nnz": 4429824}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5997568, "linear_attention_total": 2359296, "linear_attention_nnz": 1540096, "linear_dense_total": 4718592, "linear_dense_nnz": 4457472}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5968896, "linear_attention_total": 2359296, "linear_attention_nnz": 1548288, "linear_dense_total": 4718592, "linear_dense_nnz": 4420608}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5685248, "linear_attention_total": 2359296, "linear_attention_nnz": 1364992, "linear_dense_total": 4718592, "linear_dense_nnz": 4320256}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5458944, "linear_attention_total": 2359296, "linear_attention_nnz": 1272832, "linear_dense_total": 4718592, "linear_dense_nnz": 4186112}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4961280, "linear_attention_total": 2359296, "linear_attention_nnz": 1173504, "linear_dense_total": 4718592, "linear_dense_nnz": 3787776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3566592, "linear_attention_total": 2359296, "linear_attention_nnz": 727040, "linear_dense_total": 4718592, "linear_dense_nnz": 2839552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673664, "linear_attention_total": 2359296, "linear_attention_nnz": 671744, "linear_dense_total": 4718592, "linear_dense_nnz": 2001920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1714176, "linear_attention_total": 2359296, "linear_attention_nnz": 409600, "linear_dense_total": 4718592, "linear_dense_nnz": 1304576}}, "total_sparsity": 24.873695953757846, "linear_sparsity": 31.87451774691358}, "speed": {"eval_elapsed_time": 39.364574735984206, "cuda_eval_elapsed_time": 31.781292793273927}, "opt_eval_metrics": {"exact_match": 81.47587511825922, "f1": 88.73698799207777}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl5_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 81295202, "linear_total": 84934656, "linear_nnz": 57351168, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5262336, "linear_attention_total": 2359296, "linear_attention_nnz": 804864, "linear_dense_total": 4718592, "linear_dense_nnz": 4457472}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5235712, "linear_attention_total": 2359296, "linear_attention_nnz": 771072, "linear_dense_total": 4718592, "linear_dense_nnz": 4464640}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5669888, "linear_attention_total": 2359296, "linear_attention_nnz": 1152000, "linear_dense_total": 4718592, "linear_dense_nnz": 4517888}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5875712, "linear_attention_total": 2359296, "linear_attention_nnz": 1312768, "linear_dense_total": 4718592, "linear_dense_nnz": 4562944}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6056960, "linear_attention_total": 2359296, "linear_attention_nnz": 1501184, "linear_dense_total": 4718592, "linear_dense_nnz": 4555776}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5898240, "linear_attention_total": 2359296, "linear_attention_nnz": 1377280, "linear_dense_total": 4718592, "linear_dense_nnz": 4520960}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5831680, "linear_attention_total": 2359296, "linear_attention_nnz": 1357824, "linear_dense_total": 4718592, "linear_dense_nnz": 4473856}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5483520, "linear_attention_total": 2359296, "linear_attention_nnz": 1192960, "linear_dense_total": 4718592, "linear_dense_nnz": 4290560}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4958208, "linear_attention_total": 2359296, "linear_attention_nnz": 1069056, "linear_dense_total": 4718592, "linear_dense_nnz": 3889152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3236864, "linear_attention_total": 2359296, "linear_attention_nnz": 718848, "linear_dense_total": 4718592, "linear_dense_nnz": 2518016}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2222080, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 1607680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1619968, "linear_attention_total": 2359296, "linear_attention_nnz": 389120, "linear_dense_total": 4718592, "linear_dense_nnz": 1230848}}, "total_sparsity": 25.344087186502197, "linear_sparsity": 32.47612847222222}, "speed": {"eval_elapsed_time": 38.83884137077257, "cuda_eval_elapsed_time": 31.28699700164795}, "opt_eval_metrics": {"exact_match": 81.51371807000946, "f1": 88.67903677006836}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39702836, "linear_total": 84934656, "linear_nnz": 15791104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125888, "linear_attention_total": 2359296, "linear_attention_nnz": 838656, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1188352, "linear_attention_total": 2359296, "linear_attention_nnz": 692224, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1694720, "linear_attention_total": 2359296, "linear_attention_nnz": 1089536, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1962496, "linear_attention_total": 2359296, "linear_attention_nnz": 1291264, "linear_dense_total": 4718592, "linear_dense_nnz": 671232}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2112512, "linear_attention_total": 2359296, "linear_attention_nnz": 1384448, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1783296, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1632768, "linear_attention_total": 2359296, "linear_attention_nnz": 1127424, "linear_dense_total": 4718592, "linear_dense_nnz": 505344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1333760, "linear_attention_total": 2359296, "linear_attention_nnz": 942080, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1204736, "linear_attention_total": 2359296, "linear_attention_nnz": 982016, "linear_dense_total": 4718592, "linear_dense_nnz": 222720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 729600, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 573952, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 449024, "linear_attention_total": 2359296, "linear_attention_nnz": 293888, "linear_dense_total": 4718592, "linear_dense_nnz": 155136}}, "total_sparsity": 63.53965068117302, "linear_sparsity": 81.40793788580247}, "speed": {"eval_elapsed_time": 24.72419478977099, "cuda_eval_elapsed_time": 17.39827905654907}, "opt_eval_metrics": {"exact_match": 79.20529801324503, "f1": 87.11181141207972}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41908226, "linear_total": 84934656, "linear_nnz": 17949696, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081280, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 606720}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529856, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 841728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2437632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2115072, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1927680, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1448448, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 668160}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665088, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 173568}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 595968, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.51437244200017, "linear_sparsity": 78.86646412037037}, "speed": {"eval_elapsed_time": 20.012413467280567, "cuda_eval_elapsed_time": 12.710339965820312}, "opt_eval_metrics": {"exact_match": 78.05108798486282, "f1": 85.81174728555466}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41954306, "linear_total": 84934656, "linear_nnz": 17995776, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2079744, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1626624, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 840192}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434560, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1058304}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2116608, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1035264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1829376, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886208, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1099776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497600, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1210368, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 764928, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 546816, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.4720557446083, "linear_sparsity": 78.81221064814815}, "speed": {"eval_elapsed_time": 19.88829416874796, "cuda_eval_elapsed_time": 12.579588932037353}, "opt_eval_metrics": {"exact_match": 77.62535477767265, "f1": 85.49958980627748}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 36346370, "linear_total": 84934656, "linear_nnz": 12387840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1721856, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 950784, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1857024, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437696, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1113600, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1015296, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 425472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 731136, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 999936, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 213504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 296448, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 99840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 614400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 122880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}}, "total_sparsity": 66.62199781720042, "linear_sparsity": 85.4148582175926}, "speed": {"eval_elapsed_time": 18.05568285798654, "cuda_eval_elapsed_time": 10.772465507507324}, "opt_eval_metrics": {"exact_match": 76.75496688741723, "f1": 84.83470649534952}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 35750402, "linear_total": 84934656, "linear_nnz": 11791872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893952, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1247232, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 520704}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 978432, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 388608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 365568, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}}, "total_sparsity": 67.16929377013544, "linear_sparsity": 86.11653645833334}, "speed": {"eval_elapsed_time": 17.86702682590112, "cuda_eval_elapsed_time": 10.594800506591797}, "opt_eval_metrics": {"exact_match": 76.3197729422895, "f1": 84.62201750681498}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 35662850, "linear_total": 84934656, "linear_nnz": 11704320, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 897024, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 258048}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804800, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1184256, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1064448, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 474624}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 364032, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}}, "total_sparsity": 67.24969549518002, "linear_sparsity": 86.21961805555556}, "speed": {"eval_elapsed_time": 17.854051379021257, "cuda_eval_elapsed_time": 10.574438259124756}, "opt_eval_metrics": {"exact_match": 76.5279091769158, "f1": 84.6776690586996}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35638274, "linear_total": 84934656, "linear_nnz": 11679744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1586688, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 210432}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887808, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1175040, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 486912}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1062912, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 290304, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 93696}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360960, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 164352}}, "total_sparsity": 67.27226440045568, "linear_sparsity": 86.24855324074075}, "speed": {"eval_elapsed_time": 17.833505778107792, "cuda_eval_elapsed_time": 10.569285308837891}, "opt_eval_metrics": {"exact_match": 76.31031220435194, "f1": 84.63605545666391}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33844482, "linear_total": 84934656, "linear_nnz": 9885952, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701184, "linear_attention_total": 2359296, "linear_attention_nnz": 295680, "linear_dense_total": 4718592, "linear_dense_nnz": 405504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042688, "linear_attention_total": 2359296, "linear_attention_nnz": 380672, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1087744, "linear_attention_total": 2359296, "linear_attention_nnz": 328960, "linear_dense_total": 4718592, "linear_dense_nnz": 758784}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1340672, "linear_attention_total": 2359296, "linear_attention_nnz": 612608, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1142784, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 811008}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1165312, "linear_attention_total": 2359296, "linear_attention_nnz": 411136, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908032, "linear_attention_total": 2359296, "linear_attention_nnz": 319744, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 956672, "linear_attention_total": 2359296, "linear_attention_nnz": 457472, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 557056, "linear_attention_total": 2359296, "linear_attention_nnz": 246784, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360192, "linear_attention_total": 2359296, "linear_attention_nnz": 252672, "linear_dense_total": 4718592, "linear_dense_nnz": 107520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315904, "linear_attention_total": 2359296, "linear_attention_nnz": 202240, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 307712, "linear_attention_total": 2359296, "linear_attention_nnz": 129536, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}}, "total_sparsity": 68.91955939281638, "linear_sparsity": 88.36052035108025}, "speed": {"eval_elapsed_time": 21.356581350788474, "cuda_eval_elapsed_time": 14.03688655090332}, "opt_eval_metrics": {"exact_match": 76.33869441816462, "f1": 84.90005817955239}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 46753113, "linear_total": 84934656, "linear_nnz": 22841856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2904576, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1430016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2368512, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1582080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3084288, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1904640}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2600448, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1715712}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2244096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1654272}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096640, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1703424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1910784, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1476096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1082880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1534464, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 748032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523776, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 327168}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 990720, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1107456, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 812544}}, "total_sparsity": 57.06516200196401, "linear_sparsity": 73.10655381944444}, "speed": {"eval_elapsed_time": 20.885264513082802, "cuda_eval_elapsed_time": 13.536273368835449}, "opt_eval_metrics": {"exact_match": 78.31598864711448, "f1": 86.14732314693939}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 46580969, "linear_total": 84934656, "linear_nnz": 22669824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2883072, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1408512}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2342400, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1555968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3055104, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1875456}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2585088, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1700352}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1635840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2082816, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1689600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1901568, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1311744}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1468416, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1075200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1528320, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 741888}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520704, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 324096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1093632, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}}, "total_sparsity": 57.22324719197764, "linear_sparsity": 73.30910011574075}, "speed": {"eval_elapsed_time": 20.782163904979825, "cuda_eval_elapsed_time": 13.420236305236816}, "opt_eval_metrics": {"exact_match": 77.96594134342479, "f1": 85.85795020085484}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46293486, "linear_total": 84934656, "linear_nnz": 22382592, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850816, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1376256}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2323968, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1537536}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3022848, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1843200}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2557440, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1672704}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1620480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2068992, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1675776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790976, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1299456}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1462272, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1523712, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 737280}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 322560}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 970752, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 786432}}, "total_sparsity": 57.487251773494805, "linear_sparsity": 73.6472800925926}, "speed": {"eval_elapsed_time": 20.441790327895433, "cuda_eval_elapsed_time": 13.101784587860108}, "opt_eval_metrics": {"exact_match": 77.88079470198676, "f1": 85.81326419854291}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl300_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 26853628, "linear_total": 84934656, "linear_nnz": 2895098, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 357857, "linear_attention_total": 2359296, "linear_attention_nnz": 13195, "linear_dense_total": 4718592, "linear_dense_nnz": 344662}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 405482, "linear_attention_total": 2359296, "linear_attention_nnz": 53357, "linear_dense_total": 4718592, "linear_dense_nnz": 352125}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 395119, "linear_attention_total": 2359296, "linear_attention_nnz": 43981, "linear_dense_total": 4718592, "linear_dense_nnz": 351138}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 394117, "linear_attention_total": 2359296, "linear_attention_nnz": 71058, "linear_dense_total": 4718592, "linear_dense_nnz": 323059}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 335373, "linear_attention_total": 2359296, "linear_attention_nnz": 47705, "linear_dense_total": 4718592, "linear_dense_nnz": 287668}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 292526, "linear_attention_total": 2359296, "linear_attention_nnz": 40348, "linear_dense_total": 4718592, "linear_dense_nnz": 252178}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 238114, "linear_attention_total": 2359296, "linear_attention_nnz": 33002, "linear_dense_total": 4718592, "linear_dense_nnz": 205112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 188891, "linear_attention_total": 2359296, "linear_attention_nnz": 38753, "linear_dense_total": 4718592, "linear_dense_nnz": 150138}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 123365, "linear_attention_total": 2359296, "linear_attention_nnz": 22052, "linear_dense_total": 4718592, "linear_dense_nnz": 101313}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64415, "linear_attention_total": 2359296, "linear_attention_nnz": 28498, "linear_dense_total": 4718592, "linear_dense_nnz": 35917}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 53470, "linear_attention_total": 2359296, "linear_attention_nnz": 18747, "linear_dense_total": 4718592, "linear_dense_nnz": 34723}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 46369, "linear_attention_total": 2359296, "linear_attention_nnz": 15957, "linear_dense_total": 4718592, "linear_dense_nnz": 30412}}, "total_sparsity": 75.33947808267818, "linear_sparsity": 96.59138196780358}, "speed": {"eval_elapsed_time": 32.84684946574271, "cuda_eval_elapsed_time": 25.38143817138672}, "opt_eval_metrics": {"exact_match": 76.98202459791864, "f1": 85.40699359564026}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 40641026, "linear_total": 84934656, "linear_nnz": 16682496, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1930752, "linear_attention_total": 2359296, "linear_attention_nnz": 1390080, "linear_dense_total": 4718592, "linear_dense_nnz": 540672}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1347840, "linear_attention_total": 2359296, "linear_attention_nnz": 622848, "linear_dense_total": 4718592, "linear_dense_nnz": 724992}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2423808, "linear_attention_total": 2359296, "linear_attention_nnz": 1506816, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1864704, "linear_attention_total": 2359296, "linear_attention_nnz": 966144, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1956096, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 990720}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742592, "linear_attention_total": 2359296, "linear_attention_nnz": 734976, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1323264, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835584, "linear_attention_total": 2359296, "linear_attention_nnz": 282624, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307904, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 403968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 536064, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 506880, "linear_attention_total": 2359296, "linear_attention_nnz": 322560, "linear_dense_total": 4718592, "linear_dense_nnz": 184320}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 761088, "linear_attention_total": 2359296, "linear_attention_nnz": 412416, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}}, "total_sparsity": 62.67808162027695, "linear_sparsity": 80.3584346064815}, "speed": {"eval_elapsed_time": 20.462010452058166, "cuda_eval_elapsed_time": 13.147123012542725}, "opt_eval_metrics": {"exact_match": 76.13055818353831, "f1": 84.59415607632204}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 39460610, "linear_total": 84934656, "linear_nnz": 15502080, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1801728, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1127424, "linear_attention_total": 2359296, "linear_attention_nnz": 471552, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2341632, "linear_attention_total": 2359296, "linear_attention_nnz": 1507584, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804032, "linear_attention_total": 2359296, "linear_attention_nnz": 960768, "linear_dense_total": 4718592, "linear_dense_nnz": 843264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899264, "linear_attention_total": 2359296, "linear_attention_nnz": 968448, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529088, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 564480, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738048, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 506880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 893952, "linear_dense_total": 4718592, "linear_dense_nnz": 384000}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668928, "linear_attention_total": 2359296, "linear_attention_nnz": 535296, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488448, "linear_attention_total": 2359296, "linear_attention_nnz": 319488, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 560640, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}}, "total_sparsity": 63.76209435179903, "linear_sparsity": 81.7482277199074}, "speed": {"eval_elapsed_time": 20.03693932434544, "cuda_eval_elapsed_time": 12.724558185577393}, "opt_eval_metrics": {"exact_match": 76.20624408703878, "f1": 84.78885528858153}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 39496706, "linear_total": 84934656, "linear_nnz": 15538176, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798656, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1122816, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 649728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2325504, "linear_attention_total": 2359296, "linear_attention_nnz": 1500672, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790208, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886976, "linear_attention_total": 2359296, "linear_attention_nnz": 963840, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522944, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 924672}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 558336, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 737280, "linear_attention_total": 2359296, "linear_attention_nnz": 235008, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1286400, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 382464}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665856, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 417792, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 559104, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 301056}}, "total_sparsity": 63.72894627217538, "linear_sparsity": 81.70572916666666}, "speed": {"eval_elapsed_time": 19.969059734605253, "cuda_eval_elapsed_time": 12.67270662689209}, "opt_eval_metrics": {"exact_match": 75.67644276253547, "f1": 84.4740049617883}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39419906, "linear_total": 84934656, "linear_nnz": 15461376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1800192, "linear_attention_total": 2359296, "linear_attention_nnz": 1334784, "linear_dense_total": 4718592, "linear_dense_nnz": 465408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1118976, "linear_attention_total": 2359296, "linear_attention_nnz": 473856, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2320896, "linear_attention_total": 2359296, "linear_attention_nnz": 1497600, "linear_dense_total": 4718592, "linear_dense_nnz": 823296}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1788672, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 832512}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1891584, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1520640, "linear_attention_total": 2359296, "linear_attention_nnz": 600576, "linear_dense_total": 4718592, "linear_dense_nnz": 920064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 732672, "linear_attention_total": 2359296, "linear_attention_nnz": 230400, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277184, "linear_attention_total": 2359296, "linear_attention_nnz": 897792, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 660480, "linear_attention_total": 2359296, "linear_attention_nnz": 528384, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555264, "linear_attention_total": 2359296, "linear_attention_nnz": 257280, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.79947410116185, "linear_sparsity": 81.79615162037037}, "speed": {"eval_elapsed_time": 20.011237109079957, "cuda_eval_elapsed_time": 12.665436817169189}, "opt_eval_metrics": {"exact_match": 76.0170293282876, "f1": 84.48208063503463}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39382274, "linear_total": 84934656, "linear_nnz": 15423744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1793280, "linear_attention_total": 2359296, "linear_attention_nnz": 1323264, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1115136, "linear_attention_total": 2359296, "linear_attention_nnz": 470016, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2321664, "linear_attention_total": 2359296, "linear_attention_nnz": 1496832, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1789440, "linear_attention_total": 2359296, "linear_attention_nnz": 960000, "linear_dense_total": 4718592, "linear_dense_nnz": 829440}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1843968, "linear_attention_total": 2359296, "linear_attention_nnz": 917760, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526016, "linear_attention_total": 2359296, "linear_attention_nnz": 607488, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 567552, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730368, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1279488, "linear_attention_total": 2359296, "linear_attention_nnz": 900096, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667392, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 556032, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.83403273736522, "linear_sparsity": 81.84045862268519}, "speed": {"eval_elapsed_time": 19.805885411333293, "cuda_eval_elapsed_time": 12.501174209594726}, "opt_eval_metrics": {"exact_match": 75.93188268684958, "f1": 84.50981123274157}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 45409666, "linear_total": 84934656, "linear_nnz": 21492736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1715200, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 1234944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895424, "linear_attention_total": 2359296, "linear_attention_nnz": 400384, "linear_dense_total": 4718592, "linear_dense_nnz": 1495040}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3012608, "linear_attention_total": 2359296, "linear_attention_nnz": 594944, "linear_dense_total": 4718592, "linear_dense_nnz": 2417664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3094528, "linear_attention_total": 2359296, "linear_attention_nnz": 813056, "linear_dense_total": 4718592, "linear_dense_nnz": 2281472}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2762752, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 2163712}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2707456, "linear_attention_total": 2359296, "linear_attention_nnz": 562176, "linear_dense_total": 4718592, "linear_dense_nnz": 2145280}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2233344, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1741824, "linear_attention_total": 2359296, "linear_attention_nnz": 678912, "linear_dense_total": 4718592, "linear_dense_nnz": 1062912}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 709632, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 370688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 524288, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 164864}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 593920, "linear_attention_total": 2359296, "linear_attention_nnz": 267264, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 163840, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}}, "total_sparsity": 58.29889117212532, "linear_sparsity": 74.6949749228395}, "speed": {"eval_elapsed_time": 25.591839706059545, "cuda_eval_elapsed_time": 18.203727100372316}, "opt_eval_metrics": {"exact_match": 77.01986754966887, "f1": 85.2617013700351}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 44308674, "linear_total": 84934656, "linear_nnz": 20392960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1598464, "linear_attention_total": 2359296, "linear_attention_nnz": 458752, "linear_dense_total": 4718592, "linear_dense_nnz": 1139712}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825792, "linear_attention_total": 2359296, "linear_attention_nnz": 398336, "linear_dense_total": 4718592, "linear_dense_nnz": 1427456}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2854912, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 2257920}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2905088, "linear_attention_total": 2359296, "linear_attention_nnz": 781312, "linear_dense_total": 4718592, "linear_dense_nnz": 2123776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2643968, "linear_attention_total": 2359296, "linear_attention_nnz": 620544, "linear_dense_total": 4718592, "linear_dense_nnz": 2023424}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2543616, "linear_attention_total": 2359296, "linear_attention_nnz": 573440, "linear_dense_total": 4718592, "linear_dense_nnz": 1970176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2049024, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 1588224}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1708032, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 666624, "linear_attention_total": 2359296, "linear_attention_nnz": 307200, "linear_dense_total": 4718592, "linear_dense_nnz": 359424}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489472, "linear_attention_total": 2359296, "linear_attention_nnz": 327680, "linear_dense_total": 4718592, "linear_dense_nnz": 161792}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598016, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 509952, "linear_attention_total": 2359296, "linear_attention_nnz": 162816, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}}, "total_sparsity": 59.309966373837206, "linear_sparsity": 75.98982445987654}, "speed": {"eval_elapsed_time": 25.33708621514961, "cuda_eval_elapsed_time": 17.965915870666503}, "opt_eval_metrics": {"exact_match": 76.98202459791864, "f1": 85.22056943761015}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 43172098, "linear_total": 84934656, "linear_nnz": 19257344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1469440, "linear_attention_total": 2359296, "linear_attention_nnz": 443392, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 396288, "linear_dense_total": 4718592, "linear_dense_nnz": 1296384}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692096, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 2113536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2728960, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 1973248}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2531328, "linear_attention_total": 2359296, "linear_attention_nnz": 565248, "linear_dense_total": 4718592, "linear_dense_nnz": 1966080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434048, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 1887232}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1978368, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 1502208}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638400, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1000448}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 620544, "linear_attention_total": 2359296, "linear_attention_nnz": 310272, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 457728, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 308224}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 467968, "linear_attention_total": 2359296, "linear_attention_nnz": 152576, "linear_dense_total": 4718592, "linear_dense_nnz": 315392}}, "total_sparsity": 60.35371946964616, "linear_sparsity": 77.3268711419753}, "speed": {"eval_elapsed_time": 25.03162538493052, "cuda_eval_elapsed_time": 17.661002613067627}, "opt_eval_metrics": {"exact_match": 76.92526017029329, "f1": 85.21713644985097}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42975330, "linear_total": 84934656, "linear_nnz": 19060736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1463296, "linear_attention_total": 2359296, "linear_attention_nnz": 455680, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1699840, "linear_attention_total": 2359296, "linear_attention_nnz": 399360, "linear_dense_total": 4718592, "linear_dense_nnz": 1300480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2724864, "linear_attention_total": 2359296, "linear_attention_nnz": 544768, "linear_dense_total": 4718592, "linear_dense_nnz": 2180096}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2670592, "linear_attention_total": 2359296, "linear_attention_nnz": 731136, "linear_dense_total": 4718592, "linear_dense_nnz": 1939456}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2498560, "linear_attention_total": 2359296, "linear_attention_nnz": 557056, "linear_dense_total": 4718592, "linear_dense_nnz": 1941504}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2407424, "linear_attention_total": 2359296, "linear_attention_nnz": 527360, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1928192, "linear_attention_total": 2359296, "linear_attention_nnz": 472064, "linear_dense_total": 4718592, "linear_dense_nnz": 1456128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 607232, "linear_dense_total": 4718592, "linear_dense_nnz": 977920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 317440}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455680, "linear_attention_total": 2359296, "linear_attention_nnz": 308224, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 240640, "linear_dense_total": 4718592, "linear_dense_nnz": 305152}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 329728}}, "total_sparsity": 60.53441764482857, "linear_sparsity": 77.55835262345678}, "speed": {"eval_elapsed_time": 24.923150293063372, "cuda_eval_elapsed_time": 17.547844924926757}, "opt_eval_metrics": {"exact_match": 77.08609271523179, "f1": 85.20287591064626}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53955042, "linear_total": 84934656, "linear_nnz": 30029824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2392064, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 1844224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2719744, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 2172928}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3872768, "linear_attention_total": 2359296, "linear_attention_nnz": 675840, "linear_dense_total": 4718592, "linear_dense_nnz": 3196928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4077568, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 3111936}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4003840, "linear_attention_total": 2359296, "linear_attention_nnz": 896000, "linear_dense_total": 4718592, "linear_dense_nnz": 3107840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3832832, "linear_attention_total": 2359296, "linear_attention_nnz": 696320, "linear_dense_total": 4718592, "linear_dense_nnz": 3136512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3280896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 2525184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2510848, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1711104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257472, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 747520}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 420864, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 748544, "linear_attention_total": 2359296, "linear_attention_nnz": 356352, "linear_dense_total": 4718592, "linear_dense_nnz": 392192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 651264, "linear_attention_total": 2359296, "linear_attention_nnz": 217088, "linear_dense_total": 4718592, "linear_dense_nnz": 434176}}, "total_sparsity": 50.45140657377771, "linear_sparsity": 64.6436149691358}, "speed": {"eval_elapsed_time": 29.29546993318945, "cuda_eval_elapsed_time": 21.865024238586425}, "opt_eval_metrics": {"exact_match": 78.94985808893094, "f1": 86.768721062838}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47626001, "linear_total": 84934656, "linear_nnz": 23714304, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2895360, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2330112, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3240960, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2061312}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3095040, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1915392}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2291712, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1800192}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2221056, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1827840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1861632, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1300992, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 1104384}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637376, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 582144, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 385536}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1052160, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 560640}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1205760, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 910848}}, "total_sparsity": 56.2635617989908, "linear_sparsity": 72.07935474537037}, "speed": {"eval_elapsed_time": 20.96597335813567, "cuda_eval_elapsed_time": 13.629536018371581}, "opt_eval_metrics": {"exact_match": 77.78618732261117, "f1": 85.70556837897196}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 45695714, "linear_total": 84934656, "linear_nnz": 21777408, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1549312, "linear_attention_total": 2359296, "linear_attention_nnz": 679936, "linear_dense_total": 4718592, "linear_dense_nnz": 869376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1868800, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 1269760}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2739200, "linear_attention_total": 2359296, "linear_attention_nnz": 875520, "linear_dense_total": 4718592, "linear_dense_nnz": 1863680}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3088384, "linear_attention_total": 2359296, "linear_attention_nnz": 1137664, "linear_dense_total": 4718592, "linear_dense_nnz": 1950720}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2821120, "linear_attention_total": 2359296, "linear_attention_nnz": 1033216, "linear_dense_total": 4718592, "linear_dense_nnz": 1787904}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2709504, "linear_attention_total": 2359296, "linear_attention_nnz": 850944, "linear_dense_total": 4718592, "linear_dense_nnz": 1858560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225152, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1426432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 987136}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049600, "linear_attention_total": 2359296, "linear_attention_nnz": 782336, "linear_dense_total": 4718592, "linear_dense_nnz": 267264}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649216, "linear_attention_total": 2359296, "linear_attention_nnz": 504832, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 662528, "linear_attention_total": 2359296, "linear_attention_nnz": 379904, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 548864, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 290816}}, "total_sparsity": 58.036204395746125, "linear_sparsity": 74.35980902777779}, "speed": {"eval_elapsed_time": 27.047370922286063, "cuda_eval_elapsed_time": 19.637727821350097}, "opt_eval_metrics": {"exact_match": 77.92809839167455, "f1": 85.97854187426412}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 44413282, "linear_total": 84934656, "linear_nnz": 20496384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1436672, "linear_attention_total": 2359296, "linear_attention_nnz": 647168, "linear_dense_total": 4718592, "linear_dense_nnz": 789504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798144, "linear_attention_total": 2359296, "linear_attention_nnz": 591872, "linear_dense_total": 4718592, "linear_dense_nnz": 1206272}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2583552, "linear_attention_total": 2359296, "linear_attention_nnz": 843776, "linear_dense_total": 4718592, "linear_dense_nnz": 1739776}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2975744, "linear_attention_total": 2359296, "linear_attention_nnz": 1118208, "linear_dense_total": 4718592, "linear_dense_nnz": 1857536}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1760256}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2509824, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 1718272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2085888, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 1330176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1731584, "linear_attention_total": 2359296, "linear_attention_nnz": 827392, "linear_dense_total": 4718592, "linear_dense_nnz": 904192}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 726016, "linear_dense_total": 4718592, "linear_dense_nnz": 257024}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 464896, "linear_dense_total": 4718592, "linear_dense_nnz": 118784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 622592, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 263168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 512000, "linear_attention_total": 2359296, "linear_attention_nnz": 240640, "linear_dense_total": 4718592, "linear_dense_nnz": 271360}}, "total_sparsity": 59.21390159343854, "linear_sparsity": 75.86805555555556}, "speed": {"eval_elapsed_time": 26.582174220122397, "cuda_eval_elapsed_time": 19.21487816619873}, "opt_eval_metrics": {"exact_match": 77.8713339640492, "f1": 85.84893170709621}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 67469538, "linear_total": 84934656, "linear_nnz": 43535360, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4336640, "linear_attention_total": 2359296, "linear_attention_nnz": 571392, "linear_dense_total": 4718592, "linear_dense_nnz": 3765248}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4451328, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 3852288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4783104, "linear_attention_total": 2359296, "linear_attention_nnz": 695296, "linear_dense_total": 4718592, "linear_dense_nnz": 4087808}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5047296, "linear_attention_total": 2359296, "linear_attention_nnz": 996352, "linear_dense_total": 4718592, "linear_dense_nnz": 4050944}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5032960, "linear_attention_total": 2359296, "linear_attention_nnz": 923648, "linear_dense_total": 4718592, "linear_dense_nnz": 4109312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4907008, "linear_attention_total": 2359296, "linear_attention_nnz": 865280, "linear_dense_total": 4718592, "linear_dense_nnz": 4041728}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4636672, "linear_attention_total": 2359296, "linear_attention_nnz": 778240, "linear_dense_total": 4718592, "linear_dense_nnz": 3858432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4243456, "linear_attention_total": 2359296, "linear_attention_nnz": 883712, "linear_dense_total": 4718592, "linear_dense_nnz": 3359744}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2818048, "linear_attention_total": 2359296, "linear_attention_nnz": 513024, "linear_dense_total": 4718592, "linear_dense_nnz": 2305024}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1289216, "linear_attention_total": 2359296, "linear_attention_nnz": 462848, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1047552, "linear_attention_total": 2359296, "linear_attention_nnz": 374784, "linear_dense_total": 4718592, "linear_dense_nnz": 672768}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 942080, "linear_attention_total": 2359296, "linear_attention_nnz": 235520, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}}, "total_sparsity": 38.04062450702838, "linear_sparsity": 48.742525077160494}, "speed": {"eval_elapsed_time": 33.15431842627004, "cuda_eval_elapsed_time": 25.63935887145996}, "opt_eval_metrics": {"exact_match": 79.67833491012298, "f1": 87.14623278516426}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 64400930, "linear_total": 84934656, "linear_nnz": 40469504, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3907584, "linear_attention_total": 2359296, "linear_attention_nnz": 527360, "linear_dense_total": 4718592, "linear_dense_nnz": 3380224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4186112, "linear_attention_total": 2359296, "linear_attention_nnz": 524288, "linear_dense_total": 4718592, "linear_dense_nnz": 3661824}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4629504, "linear_attention_total": 2359296, "linear_attention_nnz": 598016, "linear_dense_total": 4718592, "linear_dense_nnz": 4031488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5038080, "linear_attention_total": 2359296, "linear_attention_nnz": 930816, "linear_dense_total": 4718592, "linear_dense_nnz": 4107264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4822016, "linear_attention_total": 2359296, "linear_attention_nnz": 824320, "linear_dense_total": 4718592, "linear_dense_nnz": 3997696}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4773888, "linear_attention_total": 2359296, "linear_attention_nnz": 746496, "linear_dense_total": 4718592, "linear_dense_nnz": 4027392}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4408320, "linear_attention_total": 2359296, "linear_attention_nnz": 670720, "linear_dense_total": 4718592, "linear_dense_nnz": 3737600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3789824, "linear_attention_total": 2359296, "linear_attention_nnz": 794624, "linear_dense_total": 4718592, "linear_dense_nnz": 2995200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2176000, "linear_attention_total": 2359296, "linear_attention_nnz": 419840, "linear_dense_total": 4718592, "linear_dense_nnz": 1756160}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1011712, "linear_attention_total": 2359296, "linear_attention_nnz": 411648, "linear_dense_total": 4718592, "linear_dense_nnz": 600064}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 903168, "linear_attention_total": 2359296, "linear_attention_nnz": 307200, "linear_dense_total": 4718592, "linear_dense_nnz": 595968}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 823296, "linear_attention_total": 2359296, "linear_attention_nnz": 207872, "linear_dense_total": 4718592, "linear_dense_nnz": 615424}}, "total_sparsity": 40.85862268737366, "linear_sparsity": 52.35218942901234}, "speed": {"eval_elapsed_time": 32.20074478490278, "cuda_eval_elapsed_time": 24.727313552856447}, "opt_eval_metrics": {"exact_match": 79.29990539262063, "f1": 87.09851869948527}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 64383586, "linear_total": 84934656, "linear_nnz": 40452096, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3881984, "linear_attention_total": 2359296, "linear_attention_nnz": 501760, "linear_dense_total": 4718592, "linear_dense_nnz": 3380224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4185088, "linear_attention_total": 2359296, "linear_attention_nnz": 528384, "linear_dense_total": 4718592, "linear_dense_nnz": 3656704}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4703232, "linear_attention_total": 2359296, "linear_attention_nnz": 581632, "linear_dense_total": 4718592, "linear_dense_nnz": 4121600}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5060608, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 4144128}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4893696, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 4060160}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4817920, "linear_attention_total": 2359296, "linear_attention_nnz": 741376, "linear_dense_total": 4718592, "linear_dense_nnz": 4076544}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4459520, "linear_attention_total": 2359296, "linear_attention_nnz": 644096, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3720192, "linear_attention_total": 2359296, "linear_attention_nnz": 757760, "linear_dense_total": 4718592, "linear_dense_nnz": 2962432}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2070528, "linear_attention_total": 2359296, "linear_attention_nnz": 380928, "linear_dense_total": 4718592, "linear_dense_nnz": 1689600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 966656, "linear_attention_total": 2359296, "linear_attention_nnz": 395264, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 874496, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 561152}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818176, "linear_attention_total": 2359296, "linear_attention_nnz": 200704, "linear_dense_total": 4718592, "linear_dense_nnz": 617472}}, "total_sparsity": 40.874550222086434, "linear_sparsity": 52.37268518518518}, "speed": {"eval_elapsed_time": 32.14651732798666, "cuda_eval_elapsed_time": 24.66180950164795}, "opt_eval_metrics": {"exact_match": 79.22421948912014, "f1": 87.0664817371684}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41117954, "linear_total": 84934656, "linear_nnz": 17159424, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879296, "linear_attention_total": 2359296, "linear_attention_nnz": 1459968, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1487616, "linear_attention_total": 2359296, "linear_attention_nnz": 930048, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2451456, "linear_attention_total": 2359296, "linear_attention_nnz": 1651200, "linear_dense_total": 4718592, "linear_dense_nnz": 800256}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959168, "linear_attention_total": 2359296, "linear_attention_nnz": 1181952, "linear_dense_total": 4718592, "linear_dense_nnz": 777216}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1876992, "linear_attention_total": 2359296, "linear_attention_nnz": 996864, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 720384, "linear_dense_total": 4718592, "linear_dense_nnz": 886272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1242624, "linear_attention_total": 2359296, "linear_attention_nnz": 595968, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1026048, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1362432, "linear_attention_total": 2359296, "linear_attention_nnz": 1029120, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784128, "linear_attention_total": 2359296, "linear_attention_nnz": 673536, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563712, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.240103802270966, "linear_sparsity": 79.7969111689815}, "speed": {"eval_elapsed_time": 21.587445000186563, "cuda_eval_elapsed_time": 14.26605199432373}, "opt_eval_metrics": {"exact_match": 78.00378429517502, "f1": 85.86131877012127}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41037314, "linear_total": 84934656, "linear_nnz": 17078784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1881600, "linear_attention_total": 2359296, "linear_attention_nnz": 1460736, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488384, "linear_attention_total": 2359296, "linear_attention_nnz": 930816, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2430720, "linear_attention_total": 2359296, "linear_attention_nnz": 1636608, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1953024, "linear_attention_total": 2359296, "linear_attention_nnz": 1172736, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1822464, "linear_attention_total": 2359296, "linear_attention_nnz": 946944, "linear_dense_total": 4718592, "linear_dense_nnz": 875520}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602816, "linear_attention_total": 2359296, "linear_attention_nnz": 719616, "linear_dense_total": 4718592, "linear_dense_nnz": 883200}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1248768, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023744, "linear_attention_total": 2359296, "linear_attention_nnz": 530688, "linear_dense_total": 4718592, "linear_dense_nnz": 493056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360128, "linear_attention_total": 2359296, "linear_attention_nnz": 1026816, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 785664, "linear_attention_total": 2359296, "linear_attention_nnz": 675072, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 562176, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.31415802270676, "linear_sparsity": 79.89185474537037}, "speed": {"eval_elapsed_time": 21.546934511046857, "cuda_eval_elapsed_time": 14.242158630371094}, "opt_eval_metrics": {"exact_match": 78.04162724692526, "f1": 85.89832211406967}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 65744386, "linear_total": 84934656, "linear_nnz": 41809920, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3775488, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 3140608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4079616, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 3477504}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4937728, "linear_attention_total": 2359296, "linear_attention_nnz": 1008640, "linear_dense_total": 4718592, "linear_dense_nnz": 3929088}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5286912, "linear_attention_total": 2359296, "linear_attention_nnz": 1197056, "linear_dense_total": 4718592, "linear_dense_nnz": 4089856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5135360, "linear_attention_total": 2359296, "linear_attention_nnz": 1181696, "linear_dense_total": 4718592, "linear_dense_nnz": 3953664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5012480, "linear_attention_total": 2359296, "linear_attention_nnz": 1005568, "linear_dense_total": 4718592, "linear_dense_nnz": 4006912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4720640, "linear_attention_total": 2359296, "linear_attention_nnz": 1043456, "linear_dense_total": 4718592, "linear_dense_nnz": 3677184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3708928, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 2777088}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2311168, "linear_attention_total": 2359296, "linear_attention_nnz": 862208, "linear_dense_total": 4718592, "linear_dense_nnz": 1448960}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1058816, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 458752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 951296, "linear_attention_total": 2359296, "linear_attention_nnz": 456704, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 831488, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 541696}}, "total_sparsity": 39.6248852522324, "linear_sparsity": 50.774016203703695}, "speed": {"eval_elapsed_time": 34.00236483197659, "cuda_eval_elapsed_time": 26.484780250549317}, "opt_eval_metrics": {"exact_match": 80.48249763481552, "f1": 87.91705961229685}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 38110440, "linear_total": 84934656, "linear_nnz": 14151910, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1521793, "linear_attention_total": 2359296, "linear_attention_nnz": 87221, "linear_dense_total": 4718592, "linear_dense_nnz": 1434572}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637844, "linear_attention_total": 2359296, "linear_attention_nnz": 157517, "linear_dense_total": 4718592, "linear_dense_nnz": 1480327}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1723746, "linear_attention_total": 2359296, "linear_attention_nnz": 188172, "linear_dense_total": 4718592, "linear_dense_nnz": 1535574}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742961, "linear_attention_total": 2359296, "linear_attention_nnz": 230341, "linear_dense_total": 4718592, "linear_dense_nnz": 1512620}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1687428, "linear_attention_total": 2359296, "linear_attention_nnz": 240387, "linear_dense_total": 4718592, "linear_dense_nnz": 1447041}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1623377, "linear_attention_total": 2359296, "linear_attention_nnz": 195780, "linear_dense_total": 4718592, "linear_dense_nnz": 1427597}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1429982, "linear_attention_total": 2359296, "linear_attention_nnz": 184963, "linear_dense_total": 4718592, "linear_dense_nnz": 1245019}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130199, "linear_attention_total": 2359296, "linear_attention_nnz": 172954, "linear_dense_total": 4718592, "linear_dense_nnz": 957245}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 773896, "linear_attention_total": 2359296, "linear_attention_nnz": 138133, "linear_dense_total": 4718592, "linear_dense_nnz": 635763}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417863, "linear_attention_total": 2359296, "linear_attention_nnz": 112972, "linear_dense_total": 4718592, "linear_dense_nnz": 304891}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279992, "linear_attention_total": 2359296, "linear_attention_nnz": 75446, "linear_dense_total": 4718592, "linear_dense_nnz": 204546}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 182829, "linear_attention_total": 2359296, "linear_attention_nnz": 38439, "linear_dense_total": 4718592, "linear_dense_nnz": 144390}}, "total_sparsity": 65.00199746198996, "linear_sparsity": 83.3378850677867}, "speed": {"eval_elapsed_time": 42.85027553932741, "cuda_eval_elapsed_time": 35.27284997558594}, "opt_eval_metrics": {"exact_match": 80.22705771050141, "f1": 88.08154392563726}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37366845, "linear_total": 84934656, "linear_nnz": 13408315, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1442154, "linear_attention_total": 2359296, "linear_attention_nnz": 79341, "linear_dense_total": 4718592, "linear_dense_nnz": 1362813}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1557975, "linear_attention_total": 2359296, "linear_attention_nnz": 146964, "linear_dense_total": 4718592, "linear_dense_nnz": 1411011}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637409, "linear_attention_total": 2359296, "linear_attention_nnz": 173655, "linear_dense_total": 4718592, "linear_dense_nnz": 1463754}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1655712, "linear_attention_total": 2359296, "linear_attention_nnz": 213353, "linear_dense_total": 4718592, "linear_dense_nnz": 1442359}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601748, "linear_attention_total": 2359296, "linear_attention_nnz": 221518, "linear_dense_total": 4718592, "linear_dense_nnz": 1380230}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1539647, "linear_attention_total": 2359296, "linear_attention_nnz": 179373, "linear_dense_total": 4718592, "linear_dense_nnz": 1360274}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1352289, "linear_attention_total": 2359296, "linear_attention_nnz": 168393, "linear_dense_total": 4718592, "linear_dense_nnz": 1183896}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1066215, "linear_attention_total": 2359296, "linear_attention_nnz": 159612, "linear_dense_total": 4718592, "linear_dense_nnz": 906603}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727923, "linear_attention_total": 2359296, "linear_attention_nnz": 127230, "linear_dense_total": 4718592, "linear_dense_nnz": 600693}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 390947, "linear_attention_total": 2359296, "linear_attention_nnz": 105257, "linear_dense_total": 4718592, "linear_dense_nnz": 285690}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 262617, "linear_attention_total": 2359296, "linear_attention_nnz": 70746, "linear_dense_total": 4718592, "linear_dense_nnz": 191871}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 173679, "linear_attention_total": 2359296, "linear_attention_nnz": 36271, "linear_dense_total": 4718592, "linear_dense_nnz": 137408}}, "total_sparsity": 65.68486388119823, "linear_sparsity": 84.21337575088313}, "speed": {"eval_elapsed_time": 42.81137041794136, "cuda_eval_elapsed_time": 35.28436618041992}, "opt_eval_metrics": {"exact_match": 80.53926206244087, "f1": 88.07603620459668}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 45252556, "linear_total": 84934656, "linear_nnz": 21294026, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2152743, "linear_attention_total": 2359296, "linear_attention_nnz": 158912, "linear_dense_total": 4718592, "linear_dense_nnz": 1993831}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2265132, "linear_attention_total": 2359296, "linear_attention_nnz": 234395, "linear_dense_total": 4718592, "linear_dense_nnz": 2030737}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415512, "linear_attention_total": 2359296, "linear_attention_nnz": 301048, "linear_dense_total": 4718592, "linear_dense_nnz": 2114464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465567, "linear_attention_total": 2359296, "linear_attention_nnz": 358791, "linear_dense_total": 4718592, "linear_dense_nnz": 2106776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2457267, "linear_attention_total": 2359296, "linear_attention_nnz": 398673, "linear_dense_total": 4718592, "linear_dense_nnz": 2058594}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2410577, "linear_attention_total": 2359296, "linear_attention_nnz": 367333, "linear_dense_total": 4718592, "linear_dense_nnz": 2043244}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2206780, "linear_attention_total": 2359296, "linear_attention_nnz": 344288, "linear_dense_total": 4718592, "linear_dense_nnz": 1862492}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1819031, "linear_attention_total": 2359296, "linear_attention_nnz": 304514, "linear_dense_total": 4718592, "linear_dense_nnz": 1514517}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1364821, "linear_attention_total": 2359296, "linear_attention_nnz": 265513, "linear_dense_total": 4718592, "linear_dense_nnz": 1099308}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 828990, "linear_attention_total": 2359296, "linear_attention_nnz": 201714, "linear_dense_total": 4718592, "linear_dense_nnz": 627276}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574541, "linear_attention_total": 2359296, "linear_attention_nnz": 134277, "linear_dense_total": 4718592, "linear_dense_nnz": 440264}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 333065, "linear_attention_total": 2359296, "linear_attention_nnz": 63309, "linear_dense_total": 4718592, "linear_dense_nnz": 269756}}, "total_sparsity": 58.4431701722824, "linear_sparsity": 74.92893124804085}, "speed": {"eval_elapsed_time": 45.09550473978743, "cuda_eval_elapsed_time": 37.59016569519043}, "opt_eval_metrics": {"exact_match": 81.40018921475875, "f1": 88.66263407974378}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 38467586, "linear_total": 84934656, "linear_nnz": 14509056, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1740288, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 940032, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 448512}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1992192, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1728000, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1651200, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 717312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245696, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877056, "linear_attention_total": 2359296, "linear_attention_nnz": 442368, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049088, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 629760, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 233472}}, "total_sparsity": 64.67401918059409, "linear_sparsity": 82.9173900462963}, "speed": {"eval_elapsed_time": 18.811811614781618, "cuda_eval_elapsed_time": 11.526592617034913}, "opt_eval_metrics": {"exact_match": 77.9848628192999, "f1": 85.88807770994393}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 38065154, "linear_total": 84934656, "linear_nnz": 14106624, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1669632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 913920, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 422400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969152, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 631296}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1559040, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1219584, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 471552}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 955392, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1090560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 431616, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.04358500448319, "linear_sparsity": 83.3912037037037}, "speed": {"eval_elapsed_time": 18.890288611873984, "cuda_eval_elapsed_time": 11.48921844482422}, "opt_eval_metrics": {"exact_match": 77.94701986754967, "f1": 85.90050035022541}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38095874, "linear_total": 84934656, "linear_nnz": 14137344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907776, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1967616, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 591360}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1711104, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1608192, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1214976, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 625152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1161216, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 953856, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 413184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1041408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 482304, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 757248, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.0153738728886, "linear_sparsity": 83.35503472222221}, "speed": {"eval_elapsed_time": 18.660761894192547, "cuda_eval_elapsed_time": 11.365778179168702}, "opt_eval_metrics": {"exact_match": 77.43614001892148, "f1": 85.51882546766822}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35435778, "linear_total": 84934656, "linear_nnz": 11477248, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887040, "linear_attention_total": 2359296, "linear_attention_nnz": 384768, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1057792, "linear_attention_total": 2359296, "linear_attention_nnz": 355840, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1285888, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497088, "linear_attention_total": 2359296, "linear_attention_nnz": 672256, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1350912, "linear_attention_total": 2359296, "linear_attention_nnz": 418560, "linear_dense_total": 4718592, "linear_dense_nnz": 932352}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1395712, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1154816, "linear_attention_total": 2359296, "linear_attention_nnz": 498944, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059840, "linear_attention_total": 2359296, "linear_attention_nnz": 497664, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 609024, "linear_attention_total": 2359296, "linear_attention_nnz": 297216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 436224, "linear_attention_total": 2359296, "linear_attention_nnz": 316416, "linear_dense_total": 4718592, "linear_dense_nnz": 119808}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 256512, "linear_dense_total": 4718592, "linear_dense_nnz": 115200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371200, "linear_attention_total": 2359296, "linear_attention_nnz": 150016, "linear_dense_total": 4718592, "linear_dense_nnz": 221184}}, "total_sparsity": 67.45822277621669, "linear_sparsity": 86.4869671103395}, "speed": {"eval_elapsed_time": 20.89022240927443, "cuda_eval_elapsed_time": 13.557396781921387}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.28341140334766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 49759613, "linear_total": 84934656, "linear_nnz": 25846272, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3251712, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2803200, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1918464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3320832, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3353088, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2075136}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2469888, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2322432, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1929216}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2098176, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1508352}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1641984, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1248768}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638912, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566784, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1075200, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 583680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1304064, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1009152}}, "total_sparsity": 54.304199529987116, "linear_sparsity": 69.56922743055556}, "speed": {"eval_elapsed_time": 21.888684407807887, "cuda_eval_elapsed_time": 14.414027736663819}, "opt_eval_metrics": {"exact_match": 77.8713339640492, "f1": 85.86552240887988}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 47529298, "linear_total": 84934656, "linear_nnz": 23617536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2446848, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1660416}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2978304, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1995264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2216448, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1724928}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1824768, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1333248}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526784, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1133568}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35236717199184, "linear_sparsity": 72.19328703703704}, "speed": {"eval_elapsed_time": 21.027485753875226, "cuda_eval_elapsed_time": 13.685213722229005}, "opt_eval_metrics": {"exact_match": 78.06054872280038, "f1": 85.94002543374285}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47521613, "linear_total": 84934656, "linear_nnz": 23609856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2445312, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1658880}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2976768, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1993728}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214912, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1723392}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1823232, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1525248, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1132032}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35942454654601, "linear_sparsity": 72.2023292824074}, "speed": {"eval_elapsed_time": 20.988653406966478, "cuda_eval_elapsed_time": 13.657840488433838}, "opt_eval_metrics": {"exact_match": 78.10785241248817, "f1": 86.00835164251778}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35398714, "linear_total": 84934656, "linear_nnz": 11493376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907264, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1074176, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253376, "linear_attention_total": 2359296, "linear_attention_nnz": 402432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1508352, "linear_attention_total": 2359296, "linear_attention_nnz": 681984, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1328640, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1422848, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1094656, "linear_attention_total": 2359296, "linear_attention_nnz": 449536, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1102848, "linear_attention_total": 2359296, "linear_attention_nnz": 577536, "linear_dense_total": 4718592, "linear_dense_nnz": 525312}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 628224, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 434176, "linear_attention_total": 2359296, "linear_attention_nnz": 320512, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 377344, "linear_attention_total": 2359296, "linear_attention_nnz": 256000, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 361472, "linear_attention_total": 2359296, "linear_attention_nnz": 146432, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 67.49225980035152, "linear_sparsity": 86.46797839506173}, "speed": {"eval_elapsed_time": 20.778783986810595, "cuda_eval_elapsed_time": 13.44245692062378}, "opt_eval_metrics": {"exact_match": 77.18070009460737, "f1": 85.6109462422114}}, "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-215000": {"stats": {"total": 334094338, "nnz": 68649433, "linear_total": 301989888, "linear_nnz": 36684800, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1167360, "linear_attention_total": 4194304, "linear_attention_nnz": 974848, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 576512, "linear_attention_total": 4194304, "linear_attention_nnz": 306176, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 584704, "linear_attention_total": 4194304, "linear_attention_nnz": 297984, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1248256, "linear_attention_total": 4194304, "linear_attention_nnz": 834560, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 848896, "linear_attention_total": 4194304, "linear_attention_nnz": 381952, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 959488, "linear_attention_total": 4194304, "linear_attention_nnz": 406528, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1130496, "linear_attention_total": 4194304, "linear_attention_nnz": 522240, "linear_dense_total": 8388608, "linear_dense_nnz": 608256}, "7": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1209344, "linear_attention_total": 4194304, "linear_attention_nnz": 771072, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1076224, "linear_attention_total": 4194304, "linear_attention_nnz": 414720, "linear_dense_total": 8388608, "linear_dense_nnz": 661504}, "9": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1839104, "linear_attention_total": 4194304, "linear_attention_nnz": 1091584, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2709504, "linear_attention_total": 4194304, "linear_attention_nnz": 1714176, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2908160, "linear_attention_total": 4194304, "linear_attention_nnz": 1875968, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3074048, "linear_attention_total": 4194304, "linear_attention_nnz": 1832960, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3335168, "linear_attention_total": 4194304, "linear_attention_nnz": 2155520, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2851840, "linear_attention_total": 4194304, "linear_attention_nnz": 1942528, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2761728, "linear_attention_total": 4194304, "linear_attention_nnz": 2079744, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2316288, "linear_attention_total": 4194304, "linear_attention_nnz": 1843200, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1950720, "linear_attention_total": 4194304, "linear_attention_nnz": 1582080, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1757184, "linear_attention_total": 4194304, "linear_attention_nnz": 1435648, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 988160, "linear_attention_total": 4194304, "linear_attention_nnz": 717824, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 447488, "linear_attention_total": 4194304, "linear_attention_nnz": 334848, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 436224, "linear_attention_total": 4194304, "linear_attention_nnz": 358400, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 214016, "linear_attention_total": 4194304, "linear_attention_nnz": 134144, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 293888, "linear_attention_total": 4194304, "linear_attention_nnz": 111616, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 79.45208128609471, "linear_sparsity": 87.85230848524306}, "speed": {"eval_elapsed_time": 45.056276460178196, "cuda_eval_elapsed_time": 37.4033356552124}, "opt_eval_metrics": {"exact_match": 82.33680227057711, "f1": 89.04761607630476}}, "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-220000": {"stats": {"total": 334094338, "nnz": 68429014, "linear_total": 301989888, "linear_nnz": 36464640, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1181696, "linear_attention_total": 4194304, "linear_attention_nnz": 989184, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 593920, "linear_attention_total": 4194304, "linear_attention_nnz": 323584, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 573440, "linear_attention_total": 4194304, "linear_attention_nnz": 286720, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1215488, "linear_attention_total": 4194304, "linear_attention_nnz": 801792, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 863232, "linear_attention_total": 4194304, "linear_attention_nnz": 396288, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 958464, "linear_attention_total": 4194304, "linear_attention_nnz": 405504, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1124352, "linear_attention_total": 4194304, "linear_attention_nnz": 520192, "linear_dense_total": 8388608, "linear_dense_nnz": 604160}, "7": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1203200, "linear_attention_total": 4194304, "linear_attention_nnz": 764928, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1083392, "linear_attention_total": 4194304, "linear_attention_nnz": 423936, "linear_dense_total": 8388608, "linear_dense_nnz": 659456}, "9": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1817600, "linear_attention_total": 4194304, "linear_attention_nnz": 1070080, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2741248, "linear_attention_total": 4194304, "linear_attention_nnz": 1745920, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2934784, "linear_attention_total": 4194304, "linear_attention_nnz": 1902592, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3023872, "linear_attention_total": 4194304, "linear_attention_nnz": 1782784, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3326976, "linear_attention_total": 4194304, "linear_attention_nnz": 2147328, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2827264, "linear_attention_total": 4194304, "linear_attention_nnz": 1917952, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2731008, "linear_attention_total": 4194304, "linear_attention_nnz": 2049024, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2293760, "linear_attention_total": 4194304, "linear_attention_nnz": 1820672, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1931264, "linear_attention_total": 4194304, "linear_attention_nnz": 1562624, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1712128, "linear_attention_total": 4194304, "linear_attention_nnz": 1390592, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 958464, "linear_attention_total": 4194304, "linear_attention_nnz": 688128, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 439296, "linear_attention_total": 4194304, "linear_attention_nnz": 326656, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 421888, "linear_attention_total": 4194304, "linear_attention_nnz": 344064, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 208896, "linear_attention_total": 4194304, "linear_attention_nnz": 129024, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 299008, "linear_attention_total": 4194304, "linear_attention_nnz": 116736, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 79.51805636406804, "linear_sparsity": 87.92521158854166}, "speed": {"eval_elapsed_time": 45.05785069381818, "cuda_eval_elapsed_time": 37.49460416412354}, "opt_eval_metrics": {"exact_match": 82.13812677388836, "f1": 89.03656646065757}}, "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-221320": {"stats": {"total": 334094338, "nnz": 68456822, "linear_total": 301989888, "linear_nnz": 36492288, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1171456, "linear_attention_total": 4194304, "linear_attention_nnz": 978944, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 589824, "linear_attention_total": 4194304, "linear_attention_nnz": 319488, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 580608, "linear_attention_total": 4194304, "linear_attention_nnz": 293888, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1209344, "linear_attention_total": 4194304, "linear_attention_nnz": 795648, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 862208, "linear_attention_total": 4194304, "linear_attention_nnz": 395264, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 945152, "linear_attention_total": 4194304, "linear_attention_nnz": 392192, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1127424, "linear_attention_total": 4194304, "linear_attention_nnz": 523264, "linear_dense_total": 8388608, "linear_dense_nnz": 604160}, "7": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1222656, "linear_attention_total": 4194304, "linear_attention_nnz": 784384, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1076224, "linear_attention_total": 4194304, "linear_attention_nnz": 416768, "linear_dense_total": 8388608, "linear_dense_nnz": 659456}, "9": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1816576, "linear_attention_total": 4194304, "linear_attention_nnz": 1069056, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2749440, "linear_attention_total": 4194304, "linear_attention_nnz": 1754112, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2954240, "linear_attention_total": 4194304, "linear_attention_nnz": 1922048, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3016704, "linear_attention_total": 4194304, "linear_attention_nnz": 1775616, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3329024, "linear_attention_total": 4194304, "linear_attention_nnz": 2149376, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2864128, "linear_attention_total": 4194304, "linear_attention_nnz": 1954816, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2747392, "linear_attention_total": 4194304, "linear_attention_nnz": 2065408, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2296832, "linear_attention_total": 4194304, "linear_attention_nnz": 1823744, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1927168, "linear_attention_total": 4194304, "linear_attention_nnz": 1558528, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1678336, "linear_attention_total": 4194304, "linear_attention_nnz": 1356800, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 958464, "linear_attention_total": 4194304, "linear_attention_nnz": 688128, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 439296, "linear_attention_total": 4194304, "linear_attention_nnz": 326656, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 421888, "linear_attention_total": 4194304, "linear_attention_nnz": 344064, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 208896, "linear_attention_total": 4194304, "linear_attention_nnz": 129024, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 299008, "linear_attention_total": 4194304, "linear_attention_nnz": 116736, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 79.50973296650122, "linear_sparsity": 87.91605631510416}, "speed": {"eval_elapsed_time": 45.08761470299214, "cuda_eval_elapsed_time": 37.51742427825928}, "opt_eval_metrics": {"exact_match": 82.30842005676443, "f1": 89.04987146464723}}, "/home/lagunas/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 42173698, "linear_total": 84934656, "linear_nnz": 18215168, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1516544, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1758464, "linear_attention_total": 2359296, "linear_attention_nnz": 564992, "linear_dense_total": 4718592, "linear_dense_nnz": 1193472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2030080, "linear_attention_total": 2359296, "linear_attention_nnz": 646144, "linear_dense_total": 4718592, "linear_dense_nnz": 1383936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2328832, "linear_attention_total": 2359296, "linear_attention_nnz": 969472, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2283264, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1425408}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2099200, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 1396224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1846784, "linear_attention_total": 2359296, "linear_attention_nnz": 774656, "linear_dense_total": 4718592, "linear_dense_nnz": 1072128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1589760, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 783360}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 967424, "linear_attention_total": 2359296, "linear_attention_nnz": 520448, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 617216, "linear_attention_total": 2359296, "linear_attention_nnz": 435968, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 521984, "linear_attention_total": 2359296, "linear_attention_nnz": 354560, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655616, "linear_attention_total": 2359296, "linear_attention_nnz": 231680, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.27058124647028, "linear_sparsity": 78.55390383873457}, "speed": {"eval_elapsed_time": 16.755018649157137}, "opt_eval_metrics": {"exact_match": 79.25260170293284, "f1": 86.93528973939952}}, "/home/lagunas/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42038274, "linear_total": 84934656, "linear_nnz": 18079744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1493248, "linear_attention_total": 2359296, "linear_attention_nnz": 519424, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757440, "linear_attention_total": 2359296, "linear_attention_nnz": 565504, "linear_dense_total": 4718592, "linear_dense_nnz": 1191936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028800, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1382400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2297088, "linear_attention_total": 2359296, "linear_attention_nnz": 937728, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2270464, "linear_attention_total": 2359296, "linear_attention_nnz": 846592, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081792, "linear_attention_total": 2359296, "linear_attention_nnz": 688640, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815296, "linear_attention_total": 2359296, "linear_attention_nnz": 744704, "linear_dense_total": 4718592, "linear_dense_nnz": 1070592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1613312, "linear_attention_total": 2359296, "linear_attention_nnz": 831488, "linear_dense_total": 4718592, "linear_dense_nnz": 781824}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 522496, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 594944, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 513792, "linear_attention_total": 2359296, "linear_attention_nnz": 346368, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 644096, "linear_attention_total": 2359296, "linear_attention_nnz": 220160, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.39494531824976, "linear_sparsity": 78.7133487654321}, "speed": {"eval_elapsed_time": 16.721035415073857}, "opt_eval_metrics": {"exact_match": 79.2620624408704, "f1": 86.97825692623259}}, "/home/lagunas/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold_apme-sigmoied_threshold_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 61067266, "linear_total": 84934656, "linear_nnz": 37108736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3221504, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 2607104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3504128, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 2899968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4439040, "linear_attention_total": 2359296, "linear_attention_nnz": 730112, "linear_dense_total": 4718592, "linear_dense_nnz": 3708928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4859904, "linear_attention_total": 2359296, "linear_attention_nnz": 1044480, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4734976, "linear_attention_total": 2359296, "linear_attention_nnz": 1012736, "linear_dense_total": 4718592, "linear_dense_nnz": 3722240}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4660224, "linear_attention_total": 2359296, "linear_attention_nnz": 882688, "linear_dense_total": 4718592, "linear_dense_nnz": 3777536}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4239360, "linear_attention_total": 2359296, "linear_attention_nnz": 980992, "linear_dense_total": 4718592, "linear_dense_nnz": 3258368}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3137536, "linear_attention_total": 2359296, "linear_attention_nnz": 903168, "linear_dense_total": 4718592, "linear_dense_nnz": 2234368}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835008, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 1124352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877568, "linear_attention_total": 2359296, "linear_attention_nnz": 552960, "linear_dense_total": 4718592, "linear_dense_nnz": 324608}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852992, "linear_attention_total": 2359296, "linear_attention_nnz": 401408, "linear_dense_total": 4718592, "linear_dense_nnz": 451584}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746496, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 501760}}, "total_sparsity": 43.920030037508496, "linear_sparsity": 56.309076003086425}, "speed": {"eval_elapsed_time": 25.03221789188683}, "opt_eval_metrics": {"exact_match": 79.64049195837275, "f1": 87.40026291426761}}, "/home/lagunas/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49229570, "linear_total": 84934656, "linear_nnz": 25271040, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214400, "linear_attention_total": 2359296, "linear_attention_nnz": 721408, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2390784, "linear_attention_total": 2359296, "linear_attention_nnz": 635136, "linear_dense_total": 4718592, "linear_dense_nnz": 1755648}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850560, "linear_attention_total": 2359296, "linear_attention_nnz": 972032, "linear_dense_total": 4718592, "linear_dense_nnz": 1878528}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3188736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 1932288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3149824, "linear_attention_total": 2359296, "linear_attention_nnz": 1260544, "linear_dense_total": 4718592, "linear_dense_nnz": 1889280}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 1784832}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2455040, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015744, "linear_attention_total": 2359296, "linear_attention_nnz": 988160, "linear_dense_total": 4718592, "linear_dense_nnz": 1027584}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1550080, "linear_attention_total": 2359296, "linear_attention_nnz": 903424, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 886784, "linear_attention_total": 2359296, "linear_attention_nnz": 636416, "linear_dense_total": 4718592, "linear_dense_nnz": 250368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 682752, "linear_attention_total": 2359296, "linear_attention_nnz": 484608, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 980224, "linear_attention_total": 2359296, "linear_attention_nnz": 313600, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}}, "total_sparsity": 54.79095450471988, "linear_sparsity": 70.2464916087963}, "speed": {"eval_elapsed_time": 20.265998144168407}, "opt_eval_metrics": {"exact_match": 80.6244087038789, "f1": 88.07723643002453}}, "/home/lagunas/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 43891202, "linear_total": 84934656, "linear_nnz": 19932672, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045184, "linear_attention_total": 2359296, "linear_attention_nnz": 427776, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102784, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 1708032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2424576, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 1955328}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2502912, "linear_attention_total": 2359296, "linear_attention_nnz": 579840, "linear_dense_total": 4718592, "linear_dense_nnz": 1923072}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2376960, "linear_attention_total": 2359296, "linear_attention_nnz": 539904, "linear_dense_total": 4718592, "linear_dense_nnz": 1837056}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2201856, "linear_attention_total": 2359296, "linear_attention_nnz": 424704, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1907712, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 1468416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1580544, "linear_attention_total": 2359296, "linear_attention_nnz": 428544, "linear_dense_total": 4718592, "linear_dense_nnz": 1152000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1095168, "linear_attention_total": 2359296, "linear_attention_nnz": 397824, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527616, "linear_attention_total": 2359296, "linear_attention_nnz": 235776, "linear_dense_total": 4718592, "linear_dense_nnz": 291840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 428544, "linear_attention_total": 2359296, "linear_attention_nnz": 182784, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738816, "linear_attention_total": 2359296, "linear_attention_nnz": 112128, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}}, "total_sparsity": 59.6933438975695, "linear_sparsity": 76.53175636574075}, "speed": {"eval_elapsed_time": 23.130286294035614}, "opt_eval_metrics": {"exact_match": 79.15799432355723, "f1": 86.94169166073364}}}, "base_speed_report": {"eval_elapsed_time": 33.63453570473939, "cuda_eval_elapsed_time": 26.66134809112549}} \ No newline at end of file diff --git a/analysis/files/results/results15.json b/analysis/files/results/results15.json deleted file mode 100644 index 420c610e..00000000 --- a/analysis/files/results/results15.json +++ /dev/null @@ -1 +0,0 @@ -{"checkpoints": {"/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": {"stats": {"total": 90984386, "nnz": 40951962, "linear_total": 67043328, "linear_nnz": 17043456, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1268736, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "1": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1296384, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "2": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1440768, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "3": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2006016, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "4": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1709568, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1863168, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "6": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1628160, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1901568, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 525312}, "8": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 923136, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1096704, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1104384, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 804864, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 54.990121052199, "linear_sparsity": 74.57844574780059}, "speed": {"eval_elapsed_time": 20.51507593272254, "cuda_eval_elapsed_time": 13.414341842651368}, "opt_eval_metrics": {"exact_match": 78.63765373699148, "f1": 86.69392512957342}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-20000": {"stats": {"total": 99840386, "nnz": 50390485, "linear_total": 75890688, "linear_nnz": 26472960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1903104, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 526848}, "1": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1735680, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 752640}, "2": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2840064, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 873984}, "3": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2721792, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "4": {"total": 768, "nnz": 768, "linear_total": 6881280, "linear_nnz": 3208704, "linear_attention_total": 2162688, "linear_attention_nnz": 2162688, "linear_dense_total": 4718592, "linear_dense_nnz": 1046016}, "5": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2952192, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 986112}, "6": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2509824, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 740352}, "7": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2131968, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "8": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2259456, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1293312, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 1671168, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 98304}, "11": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1245696, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}}, "total_sparsity": 49.52895614806617, "linear_sparsity": 65.11698510362694}, "speed": {"eval_elapsed_time": 26.5463269171305, "cuda_eval_elapsed_time": 19.462684043884277}, "opt_eval_metrics": {"exact_match": 80.86092715231788, "f1": 88.26868699204444}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-22132": {"stats": {"total": 99840386, "nnz": 50390485, "linear_total": 75890688, "linear_nnz": 26472960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1903104, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 526848}, "1": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1735680, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 752640}, "2": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2840064, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 873984}, "3": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2721792, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "4": {"total": 768, "nnz": 768, "linear_total": 6881280, "linear_nnz": 3208704, "linear_attention_total": 2162688, "linear_attention_nnz": 2162688, "linear_dense_total": 4718592, "linear_dense_nnz": 1046016}, "5": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2952192, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 986112}, "6": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2509824, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 740352}, "7": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2131968, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "8": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2259456, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1293312, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 1671168, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 98304}, "11": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1245696, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}}, "total_sparsity": 49.52895614806617, "linear_sparsity": 65.11698510362694}, "speed": {"eval_elapsed_time": 26.70568129932508, "cuda_eval_elapsed_time": 19.503536235809328}, "opt_eval_metrics": {"exact_match": 80.87038789025544, "f1": 88.24613086360249}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_v0/checkpoint-20000": {"stats": {"total": 274806402, "nnz": 85945974, "linear_total": 242745344, "linear_nnz": 53977088, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 1765376, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 794624, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 811008, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1724416, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 991232, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 1077248, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 1390592, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 604160}, "7": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1748992, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1708032, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 659456}, "9": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 2320384, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3616768, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3653632, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4386816, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4325376, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3792896, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3303424, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3356672, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3252224, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3205120, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 10485760, "linear_nnz": 2367488, "linear_attention_total": 2097152, "linear_attention_nnz": 2097152, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 899072, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1388544, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 866304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1230848, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 68.72490110328653, "linear_sparsity": 77.763903887689}, "speed": {"eval_elapsed_time": 44.695561807602644, "cuda_eval_elapsed_time": 37.43465453338623}, "opt_eval_metrics": {"exact_match": 81.78807947019868, "f1": 88.89084139605751}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_v0/checkpoint-22132": {"stats": {"total": 274806402, "nnz": 85945974, "linear_total": 242745344, "linear_nnz": 53977088, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 1765376, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 794624, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 811008, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1724416, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 991232, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 1077248, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 1390592, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 604160}, "7": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1748992, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1708032, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 659456}, "9": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 2320384, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3616768, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3653632, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4386816, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4325376, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3792896, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3303424, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3356672, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3252224, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3205120, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 10485760, "linear_nnz": 2367488, "linear_attention_total": 2097152, "linear_attention_nnz": 2097152, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 899072, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1388544, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 866304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1230848, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 68.72490110328653, "linear_sparsity": 77.763903887689}, "speed": {"eval_elapsed_time": 44.85223976522684, "cuda_eval_elapsed_time": 37.58739047241211}, "opt_eval_metrics": {"exact_match": 81.67455061494796, "f1": 88.83202816693091}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-15000": {"stats": {"total": 95510786, "nnz": 52448657, "linear_total": 71565312, "linear_nnz": 28531200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2125824, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2357760, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3317760, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3334656, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3495936, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2809344, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2646528, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2363904, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2119680, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1428480, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1367040, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1164288, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 45.08614241746477, "linear_sparsity": 60.1326407967033}, "speed": {"eval_elapsed_time": 25.723125794902444, "cuda_eval_elapsed_time": 18.497972122192383}, "opt_eval_metrics": {"exact_match": 80.80416272469253, "f1": 88.20260662536118}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-20000": {"stats": {"total": 95510786, "nnz": 52448657, "linear_total": 71565312, "linear_nnz": 28531200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2125824, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2357760, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3317760, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3334656, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3495936, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2809344, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2646528, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2363904, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2119680, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1428480, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1367040, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1164288, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 45.08614241746477, "linear_sparsity": 60.1326407967033}, "speed": {"eval_elapsed_time": 25.615046767052263, "cuda_eval_elapsed_time": 18.414370426177978}, "opt_eval_metrics": {"exact_match": 80.6717123935667, "f1": 88.128983727943}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": {"stats": {"total": 95510786, "nnz": 52448657, "linear_total": 71565312, "linear_nnz": 28531200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2125824, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2357760, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3317760, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3334656, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3495936, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2809344, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2646528, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2363904, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2119680, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1428480, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1367040, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1164288, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 45.08614241746477, "linear_sparsity": 60.1326407967033}, "speed": {"eval_elapsed_time": 25.64635610114783, "cuda_eval_elapsed_time": 18.39916780090332}, "opt_eval_metrics": {"exact_match": 80.68117313150425, "f1": 88.11014400914335}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13/checkpoint-22132": {"stats": {"total": 96101186, "nnz": 47671853, "linear_total": 72155136, "linear_nnz": 23757312, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1420800, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 634368}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1703424, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "2": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2786304, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2649600, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1076736}, "4": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 3124224, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 1158144}, "5": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2449920, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "6": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2388480, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2006016, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 1910784, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1122816, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1291776, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 112128}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 903168, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}}, "total_sparsity": 50.394105437991165, "linear_sparsity": 67.07467643051771}, "speed": {"eval_elapsed_time": 24.566468104720116, "cuda_eval_elapsed_time": 17.369024868011476}, "opt_eval_metrics": {"exact_match": 80.69063386944181, "f1": 88.06386432532665}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10/checkpoint-20000": {"stats": {"total": 274806402, "nnz": 85945974, "linear_total": 242745344, "linear_nnz": 53977088, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 1765376, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 794624, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 811008, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1724416, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 991232, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 1077248, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 1390592, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 604160}, "7": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1748992, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1708032, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 659456}, "9": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 2320384, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3616768, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3653632, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4386816, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4325376, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3792896, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3303424, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3356672, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3252224, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3205120, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 10485760, "linear_nnz": 2367488, "linear_attention_total": 2097152, "linear_attention_nnz": 2097152, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 899072, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1388544, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 866304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1230848, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 68.72490110328653, "linear_sparsity": 77.763903887689}, "speed": {"eval_elapsed_time": 44.92327093007043, "cuda_eval_elapsed_time": 37.62901539611816}, "opt_eval_metrics": {"exact_match": 81.83538315988648, "f1": 88.91108458489386}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10/checkpoint-22132": {"stats": {"total": 274806402, "nnz": 85945974, "linear_total": 242745344, "linear_nnz": 53977088, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 1765376, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 794624, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 811008, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1724416, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 991232, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 1077248, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 1390592, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 604160}, "7": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1748992, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1708032, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 659456}, "9": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 2320384, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3616768, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3653632, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4386816, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4325376, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3792896, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3303424, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3356672, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3252224, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3205120, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 10485760, "linear_nnz": 2367488, "linear_attention_total": 2097152, "linear_attention_nnz": 2097152, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 899072, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1388544, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 866304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1230848, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 68.72490110328653, "linear_sparsity": 77.763903887689}, "speed": {"eval_elapsed_time": 44.89120363816619, "cuda_eval_elapsed_time": 37.62109769439697}, "opt_eval_metrics": {"exact_match": 81.64616840113528, "f1": 88.81401461448195}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-22132": {"stats": {"total": 99446786, "nnz": 54738530, "linear_total": 75497472, "linear_nnz": 30818304, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2502144, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1125888}, "1": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 2268672, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3293184, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1523712}, "3": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3325440, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1555968}, "4": {"total": 768, "nnz": 768, "linear_total": 6881280, "linear_nnz": 3780096, "linear_attention_total": 2162688, "linear_attention_nnz": 2162688, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "5": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 3480576, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 1514496}, "6": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2904576, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1135104}, "7": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2420736, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "8": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2440704, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 474624}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1388544, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 208896}, "10": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1545216, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "11": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1468416, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 485376}}, "total_sparsity": 44.95696422004025, "linear_sparsity": 59.1796875}, "speed": {"eval_elapsed_time": 28.055892461910844, "cuda_eval_elapsed_time": 20.844706882476807}, "opt_eval_metrics": {"exact_match": 81.69347209082308, "f1": 88.72194531479171}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-22132": {"stats": {"total": 93345986, "nnz": 42356011, "linear_total": 69402624, "linear_nnz": 18445824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1198080, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1379328, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1878528, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "3": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2090496, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 714240}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2210304, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1726464, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 743424}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1747968, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1826304, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "8": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1443840, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 264192}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1084416, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 101376}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1070592, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 789504, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}}, "total_sparsity": 54.62471091151151, "linear_sparsity": 73.42200779036827}, "speed": {"eval_elapsed_time": 21.946189539972693, "cuda_eval_elapsed_time": 14.804741985321046}, "opt_eval_metrics": {"exact_match": 79.4228949858089, "f1": 87.22907143184382}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-20000": {"stats": {"total": 93149186, "nnz": 46161559, "linear_total": 69206016, "linear_nnz": 22248960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1634304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1887744, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2400768, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2588160, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2655744, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 2199552, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2131968, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2092032, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1417728, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1155072, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1130496, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 955392, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 50.443411282198426, "linear_sparsity": 67.85111860795455}, "speed": {"eval_elapsed_time": 22.954096999950707, "cuda_eval_elapsed_time": 15.809154163360596}, "opt_eval_metrics": {"exact_match": 80.01892147587512, "f1": 87.70568682399205}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": {"stats": {"total": 93149186, "nnz": 46161559, "linear_total": 69206016, "linear_nnz": 22248960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1634304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1887744, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2400768, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2588160, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2655744, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 2199552, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2131968, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2092032, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1417728, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1155072, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1130496, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 955392, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 50.443411282198426, "linear_sparsity": 67.85111860795455}, "speed": {"eval_elapsed_time": 23.06451029283926, "cuda_eval_elapsed_time": 15.812982402801515}, "opt_eval_metrics": {"exact_match": 80.02838221381268, "f1": 87.70940223967354}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15/checkpoint-22132": {"stats": {"total": 93739586, "nnz": 40333447, "linear_total": 69795840, "linear_nnz": 16424448, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 967680, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1085952, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 299520}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1586688, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 407040}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2013696, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1872384, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1416192, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1517568, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1645056, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 268800}, "8": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1534464, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1056768, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 73728}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1041408, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 58368}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 686592, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 56.972876965767696, "linear_sparsity": 76.46786971830986}, "speed": {"eval_elapsed_time": 21.412942298222333, "cuda_eval_elapsed_time": 14.317796279907228}, "opt_eval_metrics": {"exact_match": 78.82686849574267, "f1": 86.75497848244157}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40/checkpoint-22132": {"stats": {"total": 97281986, "nnz": 45486623, "linear_total": 73334784, "linear_nnz": 21573120, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1477632, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}, "1": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1466880, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 483840}, "2": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2388480, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 619008}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2230272, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 657408}, "4": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2671104, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 705024}, "5": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2241024, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 668160}, "6": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2088960, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 516096}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1760256, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 384000}, "8": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 1973760, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 204288}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1271808, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1253376, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 73728}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 749568, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 159744}}, "total_sparsity": 53.24250164876363, "linear_sparsity": 70.58269101876675}, "speed": {"eval_elapsed_time": 24.223033807240427, "cuda_eval_elapsed_time": 17.122725742340087}, "opt_eval_metrics": {"exact_match": 80.23651844843897, "f1": 87.68464122182475}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42067458, "linear_total": 84934656, "linear_nnz": 18108928, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437184, "linear_attention_total": 2359296, "linear_attention_nnz": 472576, "linear_dense_total": 4718592, "linear_dense_nnz": 964608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1754624, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015488, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2276608, "linear_attention_total": 2359296, "linear_attention_nnz": 951040, "linear_dense_total": 4718592, "linear_dense_nnz": 1325568}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2280448, "linear_attention_total": 2359296, "linear_attention_nnz": 861184, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2123008, "linear_attention_total": 2359296, "linear_attention_nnz": 779008, "linear_dense_total": 4718592, "linear_dense_nnz": 1344000}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841152, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1041408}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553664, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042432, "linear_attention_total": 2359296, "linear_attention_nnz": 610816, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 584960, "linear_attention_total": 2359296, "linear_attention_nnz": 405248, "linear_dense_total": 4718592, "linear_dense_nnz": 179712}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 540928, "linear_attention_total": 2359296, "linear_attention_nnz": 395008, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 658432, "linear_attention_total": 2359296, "linear_attention_nnz": 217600, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}}, "total_sparsity": 61.3681447432349, "linear_sparsity": 78.6789882330247}, "speed": {"eval_elapsed_time": 25.099786044564098, "cuda_eval_elapsed_time": 17.826530269622804}, "opt_eval_metrics": {"exact_match": 79.38505203405866, "f1": 87.07610213911921}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41735426, "linear_total": 84934656, "linear_nnz": 17776896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1405440, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 929280}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1732352, "linear_attention_total": 2359296, "linear_attention_nnz": 589568, "linear_dense_total": 4718592, "linear_dense_nnz": 1142784}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1979136, "linear_attention_total": 2359296, "linear_attention_nnz": 628992, "linear_dense_total": 4718592, "linear_dense_nnz": 1350144}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2218752, "linear_attention_total": 2359296, "linear_attention_nnz": 913152, "linear_dense_total": 4718592, "linear_dense_nnz": 1305600}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2257664, "linear_attention_total": 2359296, "linear_attention_nnz": 850688, "linear_dense_total": 4718592, "linear_dense_nnz": 1406976}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096384, "linear_attention_total": 2359296, "linear_attention_nnz": 764672, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1786112, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 1022976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1538816, "linear_attention_total": 2359296, "linear_attention_nnz": 781568, "linear_dense_total": 4718592, "linear_dense_nnz": 757248}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1027840, "linear_attention_total": 2359296, "linear_attention_nnz": 596224, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 176640}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523008, "linear_attention_total": 2359296, "linear_attention_nnz": 378624, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640000, "linear_attention_total": 2359296, "linear_attention_nnz": 208384, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.67306005721974, "linear_sparsity": 79.0699146412037}, "speed": {"eval_elapsed_time": 24.550941672176123, "cuda_eval_elapsed_time": 17.18915576171875}, "opt_eval_metrics": {"exact_match": 78.72280037842951, "f1": 86.62745564109652}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 43462146, "linear_total": 84934656, "linear_nnz": 19503616, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660672, "linear_attention_total": 2359296, "linear_attention_nnz": 579328, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 632576, "linear_dense_total": 4718592, "linear_dense_nnz": 1267200}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2031104, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1446912}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2544128, "linear_attention_total": 2359296, "linear_attention_nnz": 1049600, "linear_dense_total": 4718592, "linear_dense_nnz": 1494528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2395904, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 1479168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2184960, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 1394688}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1912320, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1113600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 969216, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 471808, "linear_dense_total": 4718592, "linear_dense_nnz": 497664}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 717312, "linear_attention_total": 2359296, "linear_attention_nnz": 505344, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631040, "linear_attention_total": 2359296, "linear_attention_nnz": 448256, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 288256, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}}, "total_sparsity": 60.08735936884057, "linear_sparsity": 77.03691647376543}, "speed": {"eval_elapsed_time": 23.06254121195525, "cuda_eval_elapsed_time": 15.659411109924317}, "opt_eval_metrics": {"exact_match": 78.93093661305582, "f1": 86.85787750084084}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42678018, "linear_total": 84934656, "linear_nnz": 18719488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1560320, "linear_attention_total": 2359296, "linear_attention_nnz": 543488, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1816320, "linear_attention_total": 2359296, "linear_attention_nnz": 593664, "linear_dense_total": 4718592, "linear_dense_nnz": 1222656}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2024704, "linear_attention_total": 2359296, "linear_attention_nnz": 603904, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2329856, "linear_attention_total": 2359296, "linear_attention_nnz": 870656, "linear_dense_total": 4718592, "linear_dense_nnz": 1459200}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2332928, "linear_attention_total": 2359296, "linear_attention_nnz": 887552, "linear_dense_total": 4718592, "linear_dense_nnz": 1445376}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 720640, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742080, "linear_attention_total": 2359296, "linear_attention_nnz": 926464, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 944384, "linear_attention_total": 2359296, "linear_attention_nnz": 455936, "linear_dense_total": 4718592, "linear_dense_nnz": 488448}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705280, "linear_attention_total": 2359296, "linear_attention_nnz": 505600, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 587264, "linear_attention_total": 2359296, "linear_attention_nnz": 409088, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 697856, "linear_attention_total": 2359296, "linear_attention_nnz": 250880, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}}, "total_sparsity": 60.80744850279245, "linear_sparsity": 77.96012972608024}, "speed": {"eval_elapsed_time": 22.749651389196515, "cuda_eval_elapsed_time": 15.405996612548828}, "opt_eval_metrics": {"exact_match": 78.78902554399244, "f1": 86.64151988736798}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 46192898, "linear_total": 84934656, "linear_nnz": 22234368, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 551680, "linear_dense_total": 4718592, "linear_dense_nnz": 1539072}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2278656, "linear_attention_total": 2359296, "linear_attention_nnz": 596736, "linear_dense_total": 4718592, "linear_dense_nnz": 1681920}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2418688, "linear_attention_total": 2359296, "linear_attention_nnz": 567808, "linear_dense_total": 4718592, "linear_dense_nnz": 1850880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2844416, "linear_attention_total": 2359296, "linear_attention_nnz": 1002752, "linear_dense_total": 4718592, "linear_dense_nnz": 1841664}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2691072, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 1812480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2475264, "linear_attention_total": 2359296, "linear_attention_nnz": 721152, "linear_dense_total": 4718592, "linear_dense_nnz": 1754112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2229248, "linear_attention_total": 2359296, "linear_attention_nnz": 805376, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1966336, "linear_attention_total": 2359296, "linear_attention_nnz": 892672, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701440, "linear_attention_total": 2359296, "linear_attention_nnz": 454144, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598272, "linear_attention_total": 2359296, "linear_attention_nnz": 361728, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858880, "linear_attention_total": 2359296, "linear_attention_nnz": 238336, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}}, "total_sparsity": 57.57962486284496, "linear_sparsity": 73.82179542824075}, "speed": {"eval_elapsed_time": 24.459769875742495, "cuda_eval_elapsed_time": 17.10724199295044}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 86.84346997900737}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-130000": {"stats": {"total": 108893186, "nnz": 38778370, "linear_total": 84934656, "linear_nnz": 14819840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1050624, "linear_attention_total": 2359296, "linear_attention_nnz": 488448, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 512512, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1628160, "linear_attention_total": 2359296, "linear_attention_nnz": 628224, "linear_dense_total": 4718592, "linear_dense_nnz": 999936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998592, "linear_attention_total": 2359296, "linear_attention_nnz": 937216, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1939968, "linear_attention_total": 2359296, "linear_attention_nnz": 821760, "linear_dense_total": 4718592, "linear_dense_nnz": 1118208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1709824, "linear_attention_total": 2359296, "linear_attention_nnz": 648448, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404928, "linear_attention_total": 2359296, "linear_attention_nnz": 641536, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817920, "linear_attention_total": 2359296, "linear_attention_nnz": 467712, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 544512, "linear_attention_total": 2359296, "linear_attention_nnz": 403200, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484096, "linear_attention_total": 2359296, "linear_attention_nnz": 367360, "linear_dense_total": 4718592, "linear_dense_nnz": 116736}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496896, "linear_attention_total": 2359296, "linear_attention_nnz": 225024, "linear_dense_total": 4718592, "linear_dense_nnz": 271872}}, "total_sparsity": 64.38861656596218, "linear_sparsity": 82.5514805169753}, "speed": {"eval_elapsed_time": 22.70655928598717, "cuda_eval_elapsed_time": 15.385544715881348}, "opt_eval_metrics": {"exact_match": 78.88363292336803, "f1": 86.63235572290178}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-165000": {"stats": {"total": 108893186, "nnz": 38293506, "linear_total": 84934656, "linear_nnz": 14334976, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1010688, "linear_attention_total": 2359296, "linear_attention_nnz": 468480, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1371392, "linear_attention_total": 2359296, "linear_attention_nnz": 518912, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1590272, "linear_attention_total": 2359296, "linear_attention_nnz": 608768, "linear_dense_total": 4718592, "linear_dense_nnz": 981504}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895936, "linear_attention_total": 2359296, "linear_attention_nnz": 869888, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1869568, "linear_attention_total": 2359296, "linear_attention_nnz": 775936, "linear_dense_total": 4718592, "linear_dense_nnz": 1093632}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663232, "linear_attention_total": 2359296, "linear_attention_nnz": 618752, "linear_dense_total": 4718592, "linear_dense_nnz": 1044480}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 629248, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1295872, "linear_attention_total": 2359296, "linear_attention_nnz": 707584, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 808704, "linear_attention_total": 2359296, "linear_attention_nnz": 463104, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 515840, "linear_attention_total": 2359296, "linear_attention_nnz": 376064, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455936, "linear_attention_total": 2359296, "linear_attention_nnz": 345344, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 212992, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 64.83388225963009, "linear_sparsity": 83.1223476080247}, "speed": {"eval_elapsed_time": 22.61387904593721, "cuda_eval_elapsed_time": 15.282898136138916}, "opt_eval_metrics": {"exact_match": 78.96877956480606, "f1": 86.71968503618079}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38916354, "linear_total": 84934656, "linear_nnz": 14957824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1209344, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 749568}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1494272, "linear_attention_total": 2359296, "linear_attention_nnz": 488192, "linear_dense_total": 4718592, "linear_dense_nnz": 1006080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1636096, "linear_attention_total": 2359296, "linear_attention_nnz": 550144, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969664, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1746944, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 1198080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1782272, "linear_attention_total": 2359296, "linear_attention_nnz": 653312, "linear_dense_total": 4718592, "linear_dense_nnz": 1128960}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1461760, "linear_attention_total": 2359296, "linear_attention_nnz": 593920, "linear_dense_total": 4718592, "linear_dense_nnz": 867840}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 754688, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531968, "linear_attention_total": 2359296, "linear_attention_nnz": 373760, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460032, "linear_attention_total": 2359296, "linear_attention_nnz": 311040, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}}, "total_sparsity": 64.26190156654981, "linear_sparsity": 82.38902150848766}, "speed": {"eval_elapsed_time": 22.325429996009916, "cuda_eval_elapsed_time": 14.97483941268921}, "opt_eval_metrics": {"exact_match": 78.7038789025544, "f1": 86.58426699451658}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 50872322, "linear_total": 84934656, "linear_nnz": 26913792, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692352, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 2007552}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666496, "linear_attention_total": 2359296, "linear_attention_nnz": 646656, "linear_dense_total": 4718592, "linear_dense_nnz": 2019840}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2931200, "linear_attention_total": 2359296, "linear_attention_nnz": 691712, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3361024, "linear_attention_total": 2359296, "linear_attention_nnz": 1149184, "linear_dense_total": 4718592, "linear_dense_nnz": 2211840}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3165952, "linear_attention_total": 2359296, "linear_attention_nnz": 1007872, "linear_dense_total": 4718592, "linear_dense_nnz": 2158080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070976, "linear_attention_total": 2359296, "linear_attention_nnz": 997376, "linear_dense_total": 4718592, "linear_dense_nnz": 2073600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2644480, "linear_attention_total": 2359296, "linear_attention_nnz": 911872, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2248704, "linear_attention_total": 2359296, "linear_attention_nnz": 944640, "linear_dense_total": 4718592, "linear_dense_nnz": 1304064}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1514240, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 751104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 839424, "linear_attention_total": 2359296, "linear_attention_nnz": 526080, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 707072, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 274944}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1071872, "linear_attention_total": 2359296, "linear_attention_nnz": 277760, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}}, "total_sparsity": 53.282364242699266, "linear_sparsity": 68.31235532407408}, "speed": {"eval_elapsed_time": 26.68251951597631, "cuda_eval_elapsed_time": 19.294823177337648}, "opt_eval_metrics": {"exact_match": 79.99053926206244, "f1": 87.56439208763325}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl225_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 27752545, "linear_total": 84934656, "linear_nnz": 3794015, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 465383, "linear_attention_total": 2359296, "linear_attention_nnz": 18728, "linear_dense_total": 4718592, "linear_dense_nnz": 446655}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527397, "linear_attention_total": 2359296, "linear_attention_nnz": 63059, "linear_dense_total": 4718592, "linear_dense_nnz": 464338}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516492, "linear_attention_total": 2359296, "linear_attention_nnz": 53761, "linear_dense_total": 4718592, "linear_dense_nnz": 462731}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 514972, "linear_attention_total": 2359296, "linear_attention_nnz": 84624, "linear_dense_total": 4718592, "linear_dense_nnz": 430348}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 443214, "linear_attention_total": 2359296, "linear_attention_nnz": 58345, "linear_dense_total": 4718592, "linear_dense_nnz": 384869}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 396921, "linear_attention_total": 2359296, "linear_attention_nnz": 50615, "linear_dense_total": 4718592, "linear_dense_nnz": 346306}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 319004, "linear_attention_total": 2359296, "linear_attention_nnz": 41344, "linear_dense_total": 4718592, "linear_dense_nnz": 277660}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 249183, "linear_attention_total": 2359296, "linear_attention_nnz": 47420, "linear_dense_total": 4718592, "linear_dense_nnz": 201763}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 161062, "linear_attention_total": 2359296, "linear_attention_nnz": 27562, "linear_dense_total": 4718592, "linear_dense_nnz": 133500}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 81705, "linear_attention_total": 2359296, "linear_attention_nnz": 34151, "linear_dense_total": 4718592, "linear_dense_nnz": 47554}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64643, "linear_attention_total": 2359296, "linear_attention_nnz": 21311, "linear_dense_total": 4718592, "linear_dense_nnz": 43332}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 54039, "linear_attention_total": 2359296, "linear_attention_nnz": 17233, "linear_dense_total": 4718592, "linear_dense_nnz": 36806}}, "total_sparsity": 74.51397463933142, "linear_sparsity": 95.5330189363456}, "speed": {"eval_elapsed_time": 35.086605437099934, "cuda_eval_elapsed_time": 27.59815271759033}, "opt_eval_metrics": {"exact_match": 77.39829706717124, "f1": 85.66626983371626}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41730197, "linear_total": 84934656, "linear_nnz": 17822208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2202624, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2397696, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1218048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2302464, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1122816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1794048, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1460736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1155072, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1290240, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 503808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 423936, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 227328}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 806400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 904704, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 511488}}, "total_sparsity": 61.67786201057612, "linear_sparsity": 79.0165653935185}, "speed": {"eval_elapsed_time": 19.33693442400545, "cuda_eval_elapsed_time": 12.028588153839111}, "opt_eval_metrics": {"exact_match": 77.70104068117313, "f1": 85.6071153919288}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 33917936, "linear_total": 84934656, "linear_nnz": 9959406, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1111233, "linear_attention_total": 2359296, "linear_attention_nnz": 56754, "linear_dense_total": 4718592, "linear_dense_nnz": 1054479}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222867, "linear_attention_total": 2359296, "linear_attention_nnz": 116764, "linear_dense_total": 4718592, "linear_dense_nnz": 1106103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264439, "linear_attention_total": 2359296, "linear_attention_nnz": 127558, "linear_dense_total": 4718592, "linear_dense_nnz": 1136881}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1270104, "linear_attention_total": 2359296, "linear_attention_nnz": 163709, "linear_dense_total": 4718592, "linear_dense_nnz": 1106395}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1202300, "linear_attention_total": 2359296, "linear_attention_nnz": 158018, "linear_dense_total": 4718592, "linear_dense_nnz": 1044282}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1136195, "linear_attention_total": 2359296, "linear_attention_nnz": 125746, "linear_dense_total": 4718592, "linear_dense_nnz": 1010449}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 971117, "linear_attention_total": 2359296, "linear_attention_nnz": 110023, "linear_dense_total": 4718592, "linear_dense_nnz": 861094}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746075, "linear_attention_total": 2359296, "linear_attention_nnz": 113086, "linear_dense_total": 4718592, "linear_dense_nnz": 632989}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488971, "linear_attention_total": 2359296, "linear_attention_nnz": 81879, "linear_dense_total": 4718592, "linear_dense_nnz": 407092}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 250695, "linear_attention_total": 2359296, "linear_attention_nnz": 77365, "linear_dense_total": 4718592, "linear_dense_nnz": 173330}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 172793, "linear_attention_total": 2359296, "linear_attention_nnz": 50915, "linear_dense_total": 4718592, "linear_dense_nnz": 121878}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 122617, "linear_attention_total": 2359296, "linear_attention_nnz": 28303, "linear_dense_total": 4718592, "linear_dense_nnz": 94314}}, "total_sparsity": 68.85210429971255, "linear_sparsity": 88.27403739646628}, "speed": {"eval_elapsed_time": 40.4544270709157, "cuda_eval_elapsed_time": 32.900185974121094}, "opt_eval_metrics": {"exact_match": 79.89593188268685, "f1": 87.64967103979136}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33825359, "linear_total": 84934656, "linear_nnz": 9866829, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1100628, "linear_attention_total": 2359296, "linear_attention_nnz": 56086, "linear_dense_total": 4718592, "linear_dense_nnz": 1044542}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211778, "linear_attention_total": 2359296, "linear_attention_nnz": 115328, "linear_dense_total": 4718592, "linear_dense_nnz": 1096450}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253069, "linear_attention_total": 2359296, "linear_attention_nnz": 125881, "linear_dense_total": 4718592, "linear_dense_nnz": 1127188}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258511, "linear_attention_total": 2359296, "linear_attention_nnz": 161525, "linear_dense_total": 4718592, "linear_dense_nnz": 1096986}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1191705, "linear_attention_total": 2359296, "linear_attention_nnz": 155911, "linear_dense_total": 4718592, "linear_dense_nnz": 1035794}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125428, "linear_attention_total": 2359296, "linear_attention_nnz": 123921, "linear_dense_total": 4718592, "linear_dense_nnz": 1001507}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 961919, "linear_attention_total": 2359296, "linear_attention_nnz": 108430, "linear_dense_total": 4718592, "linear_dense_nnz": 853489}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738628, "linear_attention_total": 2359296, "linear_attention_nnz": 111505, "linear_dense_total": 4718592, "linear_dense_nnz": 627123}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484188, "linear_attention_total": 2359296, "linear_attention_nnz": 80805, "linear_dense_total": 4718592, "linear_dense_nnz": 403383}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 247948, "linear_attention_total": 2359296, "linear_attention_nnz": 76456, "linear_dense_total": 4718592, "linear_dense_nnz": 171492}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 171235, "linear_attention_total": 2359296, "linear_attention_nnz": 50374, "linear_dense_total": 4718592, "linear_dense_nnz": 120861}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 121792, "linear_attention_total": 2359296, "linear_attention_nnz": 28038, "linear_dense_total": 4718592, "linear_dense_nnz": 93754}}, "total_sparsity": 68.93712063856779, "linear_sparsity": 88.38303530657733}, "speed": {"eval_elapsed_time": 40.57840260397643, "cuda_eval_elapsed_time": 33.03552900695801}, "opt_eval_metrics": {"exact_match": 79.8391674550615, "f1": 87.59923644792065}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl150_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 29470276, "linear_total": 84934656, "linear_nnz": 5511746, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655184, "linear_attention_total": 2359296, "linear_attention_nnz": 30729, "linear_dense_total": 4718592, "linear_dense_nnz": 624455}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 733131, "linear_attention_total": 2359296, "linear_attention_nnz": 77742, "linear_dense_total": 4718592, "linear_dense_nnz": 655389}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730379, "linear_attention_total": 2359296, "linear_attention_nnz": 70206, "linear_dense_total": 4718592, "linear_dense_nnz": 660173}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 734451, "linear_attention_total": 2359296, "linear_attention_nnz": 106339, "linear_dense_total": 4718592, "linear_dense_nnz": 628112}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655863, "linear_attention_total": 2359296, "linear_attention_nnz": 81845, "linear_dense_total": 4718592, "linear_dense_nnz": 574018}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 606306, "linear_attention_total": 2359296, "linear_attention_nnz": 68554, "linear_dense_total": 4718592, "linear_dense_nnz": 537752}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 492846, "linear_attention_total": 2359296, "linear_attention_nnz": 58217, "linear_dense_total": 4718592, "linear_dense_nnz": 434629}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379389, "linear_attention_total": 2359296, "linear_attention_nnz": 65705, "linear_dense_total": 4718592, "linear_dense_nnz": 313684}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 243207, "linear_attention_total": 2359296, "linear_attention_nnz": 39483, "linear_dense_total": 4718592, "linear_dense_nnz": 203724}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 119606, "linear_attention_total": 2359296, "linear_attention_nnz": 46007, "linear_dense_total": 4718592, "linear_dense_nnz": 73599}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 89281, "linear_attention_total": 2359296, "linear_attention_nnz": 27892, "linear_dense_total": 4718592, "linear_dense_nnz": 61389}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 72103, "linear_attention_total": 2359296, "linear_attention_nnz": 20781, "linear_dense_total": 4718592, "linear_dense_nnz": 51322}}, "total_sparsity": 72.93652882926945, "linear_sparsity": 93.51060419906804}, "speed": {"eval_elapsed_time": 37.27218507230282, "cuda_eval_elapsed_time": 29.767933349609375}, "opt_eval_metrics": {"exact_match": 78.4484389782403, "f1": 86.3547925481507}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 72624802, "linear_total": 84934656, "linear_nnz": 48687104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4657152, "linear_attention_total": 2359296, "linear_attention_nnz": 621568, "linear_dense_total": 4718592, "linear_dense_nnz": 4035584}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4759552, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 4155392}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5065728, "linear_attention_total": 2359296, "linear_attention_nnz": 781312, "linear_dense_total": 4718592, "linear_dense_nnz": 4284416}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5408768, "linear_attention_total": 2359296, "linear_attention_nnz": 1068032, "linear_dense_total": 4718592, "linear_dense_nnz": 4340736}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5324800, "linear_attention_total": 2359296, "linear_attention_nnz": 1087488, "linear_dense_total": 4718592, "linear_dense_nnz": 4237312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5189632, "linear_attention_total": 2359296, "linear_attention_nnz": 908288, "linear_dense_total": 4718592, "linear_dense_nnz": 4281344}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5169152, "linear_attention_total": 2359296, "linear_attention_nnz": 1019904, "linear_dense_total": 4718592, "linear_dense_nnz": 4149248}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4749312, "linear_attention_total": 2359296, "linear_attention_nnz": 921600, "linear_dense_total": 4718592, "linear_dense_nnz": 3827712}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3932160, "linear_attention_total": 2359296, "linear_attention_nnz": 851968, "linear_dense_total": 4718592, "linear_dense_nnz": 3080192}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1808384, "linear_attention_total": 2359296, "linear_attention_nnz": 529408, "linear_dense_total": 4718592, "linear_dense_nnz": 1278976}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1443840, "linear_attention_total": 2359296, "linear_attention_nnz": 486400, "linear_dense_total": 4718592, "linear_dense_nnz": 957440}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178624, "linear_attention_total": 2359296, "linear_attention_nnz": 286720, "linear_dense_total": 4718592, "linear_dense_nnz": 891904}}, "total_sparsity": 33.306385213120684, "linear_sparsity": 42.67698688271605}, "speed": {"eval_elapsed_time": 34.84687200607732, "cuda_eval_elapsed_time": 27.35026025390625}, "opt_eval_metrics": {"exact_match": 80.72847682119205, "f1": 88.08831525592305}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 72671586, "linear_total": 84934656, "linear_nnz": 48734208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4676608, "linear_attention_total": 2359296, "linear_attention_nnz": 644096, "linear_dense_total": 4718592, "linear_dense_nnz": 4032512}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4756480, "linear_attention_total": 2359296, "linear_attention_nnz": 583680, "linear_dense_total": 4718592, "linear_dense_nnz": 4172800}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5113856, "linear_attention_total": 2359296, "linear_attention_nnz": 789504, "linear_dense_total": 4718592, "linear_dense_nnz": 4324352}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5421056, "linear_attention_total": 2359296, "linear_attention_nnz": 1028096, "linear_dense_total": 4718592, "linear_dense_nnz": 4392960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5426176, "linear_attention_total": 2359296, "linear_attention_nnz": 1067008, "linear_dense_total": 4718592, "linear_dense_nnz": 4359168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5276672, "linear_attention_total": 2359296, "linear_attention_nnz": 943104, "linear_dense_total": 4718592, "linear_dense_nnz": 4333568}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5165056, "linear_attention_total": 2359296, "linear_attention_nnz": 1003520, "linear_dense_total": 4718592, "linear_dense_nnz": 4161536}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4797440, "linear_attention_total": 2359296, "linear_attention_nnz": 908288, "linear_dense_total": 4718592, "linear_dense_nnz": 3889152}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3890176, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 3021824}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1726464, "linear_attention_total": 2359296, "linear_attention_nnz": 520192, "linear_dense_total": 4718592, "linear_dense_nnz": 1206272}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1336320, "linear_attention_total": 2359296, "linear_attention_nnz": 445440, "linear_dense_total": 4718592, "linear_dense_nnz": 890880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1147904, "linear_attention_total": 2359296, "linear_attention_nnz": 272384, "linear_dense_total": 4718592, "linear_dense_nnz": 875520}}, "total_sparsity": 33.26342201062975, "linear_sparsity": 42.62152777777778}, "speed": {"eval_elapsed_time": 34.81455515883863, "cuda_eval_elapsed_time": 27.30190062713623}, "opt_eval_metrics": {"exact_match": 80.81362346263009, "f1": 88.10463591853348}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 72950082, "linear_total": 84934656, "linear_nnz": 49012736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4754432, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 4140032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4843520, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 4246528}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5145600, "linear_attention_total": 2359296, "linear_attention_nnz": 788480, "linear_dense_total": 4718592, "linear_dense_nnz": 4357120}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5488640, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 4426752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5463040, "linear_attention_total": 2359296, "linear_attention_nnz": 1048576, "linear_dense_total": 4718592, "linear_dense_nnz": 4414464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5317632, "linear_attention_total": 2359296, "linear_attention_nnz": 918528, "linear_dense_total": 4718592, "linear_dense_nnz": 4399104}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5230592, "linear_attention_total": 2359296, "linear_attention_nnz": 998400, "linear_dense_total": 4718592, "linear_dense_nnz": 4232192}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4838400, "linear_attention_total": 2359296, "linear_attention_nnz": 899072, "linear_dense_total": 4718592, "linear_dense_nnz": 3939328}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3848192, "linear_attention_total": 2359296, "linear_attention_nnz": 819200, "linear_dense_total": 4718592, "linear_dense_nnz": 3028992}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1620992, "linear_attention_total": 2359296, "linear_attention_nnz": 516096, "linear_dense_total": 4718592, "linear_dense_nnz": 1104896}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1332224, "linear_attention_total": 2359296, "linear_attention_nnz": 450560, "linear_dense_total": 4718592, "linear_dense_nnz": 881664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1129472, "linear_attention_total": 2359296, "linear_attention_nnz": 266240, "linear_dense_total": 4718592, "linear_dense_nnz": 863232}}, "total_sparsity": 33.00767047076757, "linear_sparsity": 42.29359567901234}, "speed": {"eval_elapsed_time": 34.80252566374838, "cuda_eval_elapsed_time": 27.296903312683106}, "opt_eval_metrics": {"exact_match": 80.53926206244087, "f1": 87.95145431777735}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39356610, "linear_total": 84934656, "linear_nnz": 15444992, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1024000, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 512000}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236992, "linear_attention_total": 2359296, "linear_attention_nnz": 551936, "linear_dense_total": 4718592, "linear_dense_nnz": 685056}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1934336, "linear_attention_total": 2359296, "linear_attention_nnz": 722944, "linear_dense_total": 4718592, "linear_dense_nnz": 1211392}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2352128, "linear_attention_total": 2359296, "linear_attention_nnz": 954368, "linear_dense_total": 4718592, "linear_dense_nnz": 1397760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028544, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1238016}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1880064, "linear_attention_total": 2359296, "linear_attention_nnz": 584704, "linear_dense_total": 4718592, "linear_dense_nnz": 1295360}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1627136, "linear_attention_total": 2359296, "linear_attention_nnz": 608256, "linear_dense_total": 4718592, "linear_dense_nnz": 1018880}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1316864, "linear_attention_total": 2359296, "linear_attention_nnz": 740352, "linear_dense_total": 4718592, "linear_dense_nnz": 576512}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 673792, "linear_attention_total": 2359296, "linear_attention_nnz": 510976, "linear_dense_total": 4718592, "linear_dense_nnz": 162816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 451584, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 94208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 304128, "linear_dense_total": 4718592, "linear_dense_nnz": 197632}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417792, "linear_attention_total": 2359296, "linear_attention_nnz": 197632, "linear_dense_total": 4718592, "linear_dense_nnz": 220160}}, "total_sparsity": 63.85760078688487, "linear_sparsity": 81.81544174382715}, "speed": {"eval_elapsed_time": 24.229180959053338, "cuda_eval_elapsed_time": 16.875545894622803}, "opt_eval_metrics": {"exact_match": 76.79280983916746, "f1": 85.3167029862563}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39183362, "linear_total": 84934656, "linear_nnz": 15271936, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1032192, "linear_attention_total": 2359296, "linear_attention_nnz": 513024, "linear_dense_total": 4718592, "linear_dense_nnz": 519168}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1215488, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 692224}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1922048, "linear_attention_total": 2359296, "linear_attention_nnz": 683008, "linear_dense_total": 4718592, "linear_dense_nnz": 1239040}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2319360, "linear_attention_total": 2359296, "linear_attention_nnz": 945152, "linear_dense_total": 4718592, "linear_dense_nnz": 1374208}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045952, "linear_attention_total": 2359296, "linear_attention_nnz": 809984, "linear_dense_total": 4718592, "linear_dense_nnz": 1235968}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1847296, "linear_attention_total": 2359296, "linear_attention_nnz": 581632, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1607680, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1287168, "linear_attention_total": 2359296, "linear_attention_nnz": 708608, "linear_dense_total": 4718592, "linear_dense_nnz": 578560}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631808, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 158720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 442368, "linear_attention_total": 2359296, "linear_attention_nnz": 352256, "linear_dense_total": 4718592, "linear_dense_nnz": 90112}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 312320, "linear_dense_total": 4718592, "linear_dense_nnz": 206848}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 401408, "linear_attention_total": 2359296, "linear_attention_nnz": 186368, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 64.01669981444019, "linear_sparsity": 82.0191936728395}, "speed": {"eval_elapsed_time": 24.20139658311382, "cuda_eval_elapsed_time": 16.840975036621096}, "opt_eval_metrics": {"exact_match": 77.04824976348155, "f1": 85.17930403802184}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 37334018, "linear_total": 84934656, "linear_nnz": 13375488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663488, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1282560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451520, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 566784}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1385472, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1265664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 675840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1069056, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.7150099364344, "linear_sparsity": 84.25202546296296}, "speed": {"eval_elapsed_time": 18.199997384101152, "cuda_eval_elapsed_time": 10.914331413269043}, "opt_eval_metrics": {"exact_match": 76.54683065279092, "f1": 84.56290825102765}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37189634, "linear_total": 84934656, "linear_nnz": 13231104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1658880, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1233408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1354752, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1387008, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 678912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.84760225492897, "linear_sparsity": 84.42201967592592}, "speed": {"eval_elapsed_time": 18.107254439033568, "cuda_eval_elapsed_time": 10.818288433074951}, "opt_eval_metrics": {"exact_match": 75.99810785241249, "f1": 84.26442986520863}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36773378, "linear_total": 84934656, "linear_nnz": 12814848, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1044480, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1177088, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1450496, "linear_attention_total": 2359296, "linear_attention_nnz": 492032, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652224, "linear_attention_total": 2359296, "linear_attention_nnz": 733696, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1511680, "linear_attention_total": 2359296, "linear_attention_nnz": 461056, "linear_dense_total": 4718592, "linear_dense_nnz": 1050624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1533952, "linear_attention_total": 2359296, "linear_attention_nnz": 580096, "linear_dense_total": 4718592, "linear_dense_nnz": 953856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1227520, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 764928}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 624384, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 700416, "linear_attention_total": 2359296, "linear_attention_nnz": 351744, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 479744, "linear_attention_total": 2359296, "linear_attention_nnz": 339968, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411392, "linear_attention_total": 2359296, "linear_attention_nnz": 276224, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 430080, "linear_attention_total": 2359296, "linear_attention_nnz": 178176, "linear_dense_total": 4718592, "linear_dense_nnz": 251904}}, "total_sparsity": 66.22986308803564, "linear_sparsity": 84.912109375}, "speed": {"eval_elapsed_time": 21.51417324412614, "cuda_eval_elapsed_time": 14.18737794494629}, "opt_eval_metrics": {"exact_match": 77.94701986754967, "f1": 86.06827252573265}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 47136529, "linear_total": 84934656, "linear_nnz": 23220736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1984512, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2164224, "linear_attention_total": 2359296, "linear_attention_nnz": 592896, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2625536, "linear_attention_total": 2359296, "linear_attention_nnz": 880640, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2992640, "linear_attention_total": 2359296, "linear_attention_nnz": 1230848, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2940928, "linear_attention_total": 2359296, "linear_attention_nnz": 1214464, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2535936, "linear_attention_total": 2359296, "linear_attention_nnz": 906240, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2213376, "linear_attention_total": 2359296, "linear_attention_nnz": 943104, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1923584, "linear_attention_total": 2359296, "linear_attention_nnz": 935936, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1419264, "linear_attention_total": 2359296, "linear_attention_nnz": 872448, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 883712, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667648, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 869376, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 56.713059162397904, "linear_sparsity": 72.66046971450618}, "speed": {"eval_elapsed_time": 25.703615359961987, "cuda_eval_elapsed_time": 18.344205406188966}, "opt_eval_metrics": {"exact_match": 80.27436140018922, "f1": 87.70461789964966}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46572775, "linear_total": 84934656, "linear_nnz": 22657536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1908736, "linear_attention_total": 2359296, "linear_attention_nnz": 627712, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2145280, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 1548288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2499072, "linear_attention_total": 2359296, "linear_attention_nnz": 789504, "linear_dense_total": 4718592, "linear_dense_nnz": 1709568}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2920960, "linear_attention_total": 2359296, "linear_attention_nnz": 1180672, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1204224, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2516992, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 1600512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2151936, "linear_attention_total": 2359296, "linear_attention_nnz": 909312, "linear_dense_total": 4718592, "linear_dense_nnz": 1242624}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1889792, "linear_attention_total": 2359296, "linear_attention_nnz": 917504, "linear_dense_total": 4718592, "linear_dense_nnz": 972288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1398272, "linear_attention_total": 2359296, "linear_attention_nnz": 856064, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858624, "linear_attention_total": 2359296, "linear_attention_nnz": 611328, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 634368, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 827392, "linear_attention_total": 2359296, "linear_attention_nnz": 268288, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}}, "total_sparsity": 57.23077199706509, "linear_sparsity": 73.32356770833333}, "speed": {"eval_elapsed_time": 25.57048795511946, "cuda_eval_elapsed_time": 18.227574043273925}, "opt_eval_metrics": {"exact_match": 79.80132450331126, "f1": 87.48291010744668}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-10000": {"stats": {"total": 108893186, "nnz": 107798786, "linear_total": 84934656, "linear_nnz": 83840256, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6865920, "linear_attention_total": 2359296, "linear_attention_nnz": 2151936, "linear_dense_total": 4718592, "linear_dense_nnz": 4713984}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7009024, "linear_attention_total": 2359296, "linear_attention_nnz": 2299648, "linear_dense_total": 4718592, "linear_dense_nnz": 4709376}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7037952, "linear_attention_total": 2359296, "linear_attention_nnz": 2330112, "linear_dense_total": 4718592, "linear_dense_nnz": 4707840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7041024, "linear_attention_total": 2359296, "linear_attention_nnz": 2330112, "linear_dense_total": 4718592, "linear_dense_nnz": 4710912}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7029760, "linear_attention_total": 2359296, "linear_attention_nnz": 2324992, "linear_dense_total": 4718592, "linear_dense_nnz": 4704768}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7043584, "linear_attention_total": 2359296, "linear_attention_nnz": 2337280, "linear_dense_total": 4718592, "linear_dense_nnz": 4706304}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7006464, "linear_attention_total": 2359296, "linear_attention_nnz": 2321664, "linear_dense_total": 4718592, "linear_dense_nnz": 4684800}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7025664, "linear_attention_total": 2359296, "linear_attention_nnz": 2342400, "linear_dense_total": 4718592, "linear_dense_nnz": 4683264}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6950656, "linear_attention_total": 2359296, "linear_attention_nnz": 2296576, "linear_dense_total": 4718592, "linear_dense_nnz": 4654080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6905600, "linear_attention_total": 2359296, "linear_attention_nnz": 2259200, "linear_dense_total": 4718592, "linear_dense_nnz": 4646400}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6924288, "linear_attention_total": 2359296, "linear_attention_nnz": 2285568, "linear_dense_total": 4718592, "linear_dense_nnz": 4638720}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7000320, "linear_attention_total": 2359296, "linear_attention_nnz": 2312448, "linear_dense_total": 4718592, "linear_dense_nnz": 4687872}}, "total_sparsity": 1.005021563057218, "linear_sparsity": 1.288519965277779}, "speed": {"eval_elapsed_time": 47.84682997409254, "cuda_eval_elapsed_time": 40.35024221801758}, "opt_eval_metrics": {"exact_match": 80.90823084200568, "f1": 88.13888839423888}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-15000": {"stats": {"total": 108893186, "nnz": 104455426, "linear_total": 84934656, "linear_nnz": 80496896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6593280, "linear_attention_total": 2359296, "linear_attention_nnz": 1914624, "linear_dense_total": 4718592, "linear_dense_nnz": 4678656}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6772736, "linear_attention_total": 2359296, "linear_attention_nnz": 2103296, "linear_dense_total": 4718592, "linear_dense_nnz": 4669440}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6885632, "linear_attention_total": 2359296, "linear_attention_nnz": 2239232, "linear_dense_total": 4718592, "linear_dense_nnz": 4646400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6876672, "linear_attention_total": 2359296, "linear_attention_nnz": 2219520, "linear_dense_total": 4718592, "linear_dense_nnz": 4657152}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6873600, "linear_attention_total": 2359296, "linear_attention_nnz": 2216448, "linear_dense_total": 4718592, "linear_dense_nnz": 4657152}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6841856, "linear_attention_total": 2359296, "linear_attention_nnz": 2226176, "linear_dense_total": 4718592, "linear_dense_nnz": 4615680}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6786560, "linear_attention_total": 2359296, "linear_attention_nnz": 2190848, "linear_dense_total": 4718592, "linear_dense_nnz": 4595712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6811392, "linear_attention_total": 2359296, "linear_attention_nnz": 2261760, "linear_dense_total": 4718592, "linear_dense_nnz": 4549632}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6609408, "linear_attention_total": 2359296, "linear_attention_nnz": 2178048, "linear_dense_total": 4718592, "linear_dense_nnz": 4431360}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6399744, "linear_attention_total": 2359296, "linear_attention_nnz": 2049792, "linear_dense_total": 4718592, "linear_dense_nnz": 4349952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6406656, "linear_attention_total": 2359296, "linear_attention_nnz": 2053632, "linear_dense_total": 4718592, "linear_dense_nnz": 4353024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6639360, "linear_attention_total": 2359296, "linear_attention_nnz": 2100480, "linear_dense_total": 4718592, "linear_dense_nnz": 4538880}}, "total_sparsity": 4.075333051601593, "linear_sparsity": 5.224910783179015}, "speed": {"eval_elapsed_time": 47.34199761413038, "cuda_eval_elapsed_time": 39.7485433807373}, "opt_eval_metrics": {"exact_match": 78.21192052980132, "f1": 86.2154189083501}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 58295010, "linear_total": 84934656, "linear_nnz": 34364416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2712576, "linear_attention_total": 2359296, "linear_attention_nnz": 934912, "linear_dense_total": 4718592, "linear_dense_nnz": 1777664}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2907136, "linear_attention_total": 2359296, "linear_attention_nnz": 738304, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4201472, "linear_attention_total": 2359296, "linear_attention_nnz": 1162240, "linear_dense_total": 4718592, "linear_dense_nnz": 3039232}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4531200, "linear_attention_total": 2359296, "linear_attention_nnz": 1366016, "linear_dense_total": 4718592, "linear_dense_nnz": 3165184}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4667392, "linear_attention_total": 2359296, "linear_attention_nnz": 1484800, "linear_dense_total": 4718592, "linear_dense_nnz": 3182592}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4599808, "linear_attention_total": 2359296, "linear_attention_nnz": 1414144, "linear_dense_total": 4718592, "linear_dense_nnz": 3185664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3828736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 2572288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2659328, "linear_attention_total": 2359296, "linear_attention_nnz": 991232, "linear_dense_total": 4718592, "linear_dense_nnz": 1668096}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1654784, "linear_attention_total": 2359296, "linear_attention_nnz": 966656, "linear_dense_total": 4718592, "linear_dense_nnz": 688128}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 927744, "linear_attention_total": 2359296, "linear_attention_nnz": 691200, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 873472, "linear_attention_total": 2359296, "linear_attention_nnz": 530432, "linear_dense_total": 4718592, "linear_dense_nnz": 343040}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 800768, "linear_attention_total": 2359296, "linear_attention_nnz": 378880, "linear_dense_total": 4718592, "linear_dense_nnz": 421888}}, "total_sparsity": 46.46587895775224, "linear_sparsity": 59.540171682098766}, "speed": {"eval_elapsed_time": 32.721989285200834, "cuda_eval_elapsed_time": 25.26232120513916}, "opt_eval_metrics": {"exact_match": 79.64995269631031, "f1": 87.30139925832849}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 56885634, "linear_total": 84934656, "linear_nnz": 32956416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2584576, "linear_attention_total": 2359296, "linear_attention_nnz": 949248, "linear_dense_total": 4718592, "linear_dense_nnz": 1635328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2798592, "linear_attention_total": 2359296, "linear_attention_nnz": 750592, "linear_dense_total": 4718592, "linear_dense_nnz": 2048000}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4019200, "linear_attention_total": 2359296, "linear_attention_nnz": 1123328, "linear_dense_total": 4718592, "linear_dense_nnz": 2895872}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4274176, "linear_attention_total": 2359296, "linear_attention_nnz": 1306624, "linear_dense_total": 4718592, "linear_dense_nnz": 2967552}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4581376, "linear_attention_total": 2359296, "linear_attention_nnz": 1475584, "linear_dense_total": 4718592, "linear_dense_nnz": 3105792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4219904, "linear_attention_total": 2359296, "linear_attention_nnz": 1285120, "linear_dense_total": 4718592, "linear_dense_nnz": 2934784}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3736576, "linear_attention_total": 2359296, "linear_attention_nnz": 1235968, "linear_dense_total": 4718592, "linear_dense_nnz": 2500608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2587648, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1604608}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1627136, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 661504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 880640, "linear_attention_total": 2359296, "linear_attention_nnz": 650240, "linear_dense_total": 4718592, "linear_dense_nnz": 230400}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 862208, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 352256}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784384, "linear_attention_total": 2359296, "linear_attention_nnz": 363520, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}}, "total_sparsity": 47.76015277944021, "linear_sparsity": 61.19791666666667}, "speed": {"eval_elapsed_time": 32.15040939580649, "cuda_eval_elapsed_time": 24.665162628173828}, "opt_eval_metrics": {"exact_match": 79.90539262062441, "f1": 87.36378709007766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 55520034, "linear_total": 84934656, "linear_nnz": 31592448, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2382848, "linear_attention_total": 2359296, "linear_attention_nnz": 889856, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2568192, "linear_attention_total": 2359296, "linear_attention_nnz": 717824, "linear_dense_total": 4718592, "linear_dense_nnz": 1850368}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3915776, "linear_attention_total": 2359296, "linear_attention_nnz": 1113088, "linear_dense_total": 4718592, "linear_dense_nnz": 2802688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4258816, "linear_attention_total": 2359296, "linear_attention_nnz": 1297408, "linear_dense_total": 4718592, "linear_dense_nnz": 2961408}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4300800, "linear_attention_total": 2359296, "linear_attention_nnz": 1402880, "linear_dense_total": 4718592, "linear_dense_nnz": 2897920}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4030464, "linear_attention_total": 2359296, "linear_attention_nnz": 1157120, "linear_dense_total": 4718592, "linear_dense_nnz": 2873344}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3661824, "linear_attention_total": 2359296, "linear_attention_nnz": 1187840, "linear_dense_total": 4718592, "linear_dense_nnz": 2473984}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2507776, "linear_attention_total": 2359296, "linear_attention_nnz": 979968, "linear_dense_total": 4718592, "linear_dense_nnz": 1527808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1562624, "linear_attention_total": 2359296, "linear_attention_nnz": 952320, "linear_dense_total": 4718592, "linear_dense_nnz": 610304}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 865280, "linear_attention_total": 2359296, "linear_attention_nnz": 642048, "linear_dense_total": 4718592, "linear_dense_nnz": 223232}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818176, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 328704}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 719872, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 388096}}, "total_sparsity": 49.0142257386059, "linear_sparsity": 62.80381944444444}, "speed": {"eval_elapsed_time": 31.509735165163875, "cuda_eval_elapsed_time": 24.0460672454834}, "opt_eval_metrics": {"exact_match": 79.82024597918638, "f1": 87.30735739624531}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 55329122, "linear_total": 84934656, "linear_nnz": 31404032, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2845696, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 2385920}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3173376, "linear_attention_total": 2359296, "linear_attention_nnz": 374784, "linear_dense_total": 4718592, "linear_dense_nnz": 2798592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3866624, "linear_attention_total": 2359296, "linear_attention_nnz": 411648, "linear_dense_total": 4718592, "linear_dense_nnz": 3454976}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4224000, "linear_attention_total": 2359296, "linear_attention_nnz": 727040, "linear_dense_total": 4718592, "linear_dense_nnz": 3496960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3954688, "linear_attention_total": 2359296, "linear_attention_nnz": 541696, "linear_dense_total": 4718592, "linear_dense_nnz": 3412992}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3993600, "linear_attention_total": 2359296, "linear_attention_nnz": 545792, "linear_dense_total": 4718592, "linear_dense_nnz": 3447808}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3427328, "linear_attention_total": 2359296, "linear_attention_nnz": 493568, "linear_dense_total": 4718592, "linear_dense_nnz": 2933760}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2641920, "linear_attention_total": 2359296, "linear_attention_nnz": 641024, "linear_dense_total": 4718592, "linear_dense_nnz": 2000896}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1293312, "linear_attention_total": 2359296, "linear_attention_nnz": 288768, "linear_dense_total": 4718592, "linear_dense_nnz": 1004544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 678912, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 339968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 671744, "linear_attention_total": 2359296, "linear_attention_nnz": 254976, "linear_dense_total": 4718592, "linear_dense_nnz": 416768}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 632832, "linear_attention_total": 2359296, "linear_attention_nnz": 165888, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}}, "total_sparsity": 49.1895461668281, "linear_sparsity": 63.025655864197525}, "speed": {"eval_elapsed_time": 28.92266427911818, "cuda_eval_elapsed_time": 21.433052574157717}, "opt_eval_metrics": {"exact_match": 77.68211920529801, "f1": 86.11161494070976}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 54788706, "linear_total": 84934656, "linear_nnz": 30864384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2661376, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 2226176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3087360, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 2727936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3895296, "linear_attention_total": 2359296, "linear_attention_nnz": 421888, "linear_dense_total": 4718592, "linear_dense_nnz": 3473408}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4162560, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 3451904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3985408, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 3437568}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3881984, "linear_attention_total": 2359296, "linear_attention_nnz": 556032, "linear_dense_total": 4718592, "linear_dense_nnz": 3325952}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3340288, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 2828288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2614272, "linear_attention_total": 2359296, "linear_attention_nnz": 622592, "linear_dense_total": 4718592, "linear_dense_nnz": 1991680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1256448, "linear_attention_total": 2359296, "linear_attention_nnz": 276480, "linear_dense_total": 4718592, "linear_dense_nnz": 979968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668672, "linear_attention_total": 2359296, "linear_attention_nnz": 337920, "linear_dense_total": 4718592, "linear_dense_nnz": 330752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 664576, "linear_attention_total": 2359296, "linear_attention_nnz": 252928, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 646144, "linear_attention_total": 2359296, "linear_attention_nnz": 158720, "linear_dense_total": 4718592, "linear_dense_nnz": 487424}}, "total_sparsity": 49.68582699012958, "linear_sparsity": 63.66102430555556}, "speed": {"eval_elapsed_time": 28.59047580976039, "cuda_eval_elapsed_time": 21.152217895507814}, "opt_eval_metrics": {"exact_match": 77.96594134342479, "f1": 86.01491496793933}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53430466, "linear_total": 84934656, "linear_nnz": 29507584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2533376, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 2119680}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2840576, "linear_attention_total": 2359296, "linear_attention_nnz": 364544, "linear_dense_total": 4718592, "linear_dense_nnz": 2476032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3843072, "linear_attention_total": 2359296, "linear_attention_nnz": 397312, "linear_dense_total": 4718592, "linear_dense_nnz": 3445760}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4069376, "linear_attention_total": 2359296, "linear_attention_nnz": 666624, "linear_dense_total": 4718592, "linear_dense_nnz": 3402752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3831808, "linear_attention_total": 2359296, "linear_attention_nnz": 492544, "linear_dense_total": 4718592, "linear_dense_nnz": 3339264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3714048, "linear_attention_total": 2359296, "linear_attention_nnz": 519168, "linear_dense_total": 4718592, "linear_dense_nnz": 3194880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3200000, "linear_attention_total": 2359296, "linear_attention_nnz": 448512, "linear_dense_total": 4718592, "linear_dense_nnz": 2751488}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415616, "linear_attention_total": 2359296, "linear_attention_nnz": 576512, "linear_dense_total": 4718592, "linear_dense_nnz": 1839104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211392, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619520, "linear_attention_total": 2359296, "linear_attention_nnz": 317440, "linear_dense_total": 4718592, "linear_dense_nnz": 302080}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 623616, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 386048}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 50.93314103235074, "linear_sparsity": 65.25848765432099}, "speed": {"eval_elapsed_time": 28.19032474886626, "cuda_eval_elapsed_time": 20.62959659576416}, "opt_eval_metrics": {"exact_match": 77.70104068117313, "f1": 85.88451743537976}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 40733175, "linear_total": 84934656, "linear_nnz": 16822784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1394688, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1640960, "linear_attention_total": 2359296, "linear_attention_nnz": 539648, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1878528, "linear_attention_total": 2359296, "linear_attention_nnz": 657408, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 864256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1902592, "linear_attention_total": 2359296, "linear_attention_nnz": 686080, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601536, "linear_attention_total": 2359296, "linear_attention_nnz": 649216, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1507328, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908800, "linear_attention_total": 2359296, "linear_attention_nnz": 474112, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 354304, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591872, "linear_attention_total": 2359296, "linear_attention_nnz": 226304, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 62.593458327135366, "linear_sparsity": 80.19326292438271}, "speed": {"eval_elapsed_time": 23.045843845698982, "cuda_eval_elapsed_time": 15.740128681182862}, "opt_eval_metrics": {"exact_match": 78.68495742667928, "f1": 86.66781681977909}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40239113, "linear_total": 84934656, "linear_nnz": 16329216, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1344512, "linear_attention_total": 2359296, "linear_attention_nnz": 518144, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 516096, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842176, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2097664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1184256}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2056192, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 664576, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1565184, "linear_attention_total": 2359296, "linear_attention_nnz": 629760, "linear_dense_total": 4718592, "linear_dense_nnz": 935424}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1486336, "linear_attention_total": 2359296, "linear_attention_nnz": 787456, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 844288, "linear_attention_total": 2359296, "linear_attention_nnz": 415744, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 423936, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 472064, "linear_attention_total": 2359296, "linear_attention_nnz": 324608, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555520, "linear_attention_total": 2359296, "linear_attention_nnz": 209920, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}}, "total_sparsity": 63.04717083032174, "linear_sparsity": 80.7743778935185}, "speed": {"eval_elapsed_time": 22.898354202043265, "cuda_eval_elapsed_time": 15.578330577850343}, "opt_eval_metrics": {"exact_match": 78.80794701986756, "f1": 86.74156854566804}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.9999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-5000": {"stats": {"total": 108893186, "nnz": 108881410, "linear_total": 84934656, "linear_nnz": 84922880, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7072768, "linear_attention_total": 2359296, "linear_attention_nnz": 2354176, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7076352, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4717056}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7076864, "linear_attention_total": 2359296, "linear_attention_nnz": 2358272, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7076864, "linear_attention_total": 2359296, "linear_attention_nnz": 2358272, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7074816, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4715520}}, "total_sparsity": 0.010814267111258768, "linear_sparsity": 0.013864776234573384}, "speed": {"eval_elapsed_time": 46.97493433812633, "cuda_eval_elapsed_time": 39.4461767578125}, "opt_eval_metrics": {"exact_match": 78.7038789025544, "f1": 86.6699349353281}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 43189250, "linear_total": 84934656, "linear_nnz": 19230720, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1584896, "linear_attention_total": 2359296, "linear_attention_nnz": 494336, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1917184, "linear_attention_total": 2359296, "linear_attention_nnz": 631552, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2092032, "linear_attention_total": 2359296, "linear_attention_nnz": 648192, "linear_dense_total": 4718592, "linear_dense_nnz": 1443840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2466816, "linear_attention_total": 2359296, "linear_attention_nnz": 1047552, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2403328, "linear_attention_total": 2359296, "linear_attention_nnz": 942592, "linear_dense_total": 4718592, "linear_dense_nnz": 1460736}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2211072, "linear_attention_total": 2359296, "linear_attention_nnz": 837888, "linear_dense_total": 4718592, "linear_dense_nnz": 1373184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1936640, "linear_attention_total": 2359296, "linear_attention_nnz": 841472, "linear_dense_total": 4718592, "linear_dense_nnz": 1095168}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661440, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 827904}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1084160, "linear_attention_total": 2359296, "linear_attention_nnz": 621824, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621056, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 188928}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 568064, "linear_attention_total": 2359296, "linear_attention_nnz": 411392, "linear_dense_total": 4718592, "linear_dense_nnz": 156672}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 684032, "linear_attention_total": 2359296, "linear_attention_nnz": 223232, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 60.33796825450584, "linear_sparsity": 77.3582175925926}, "speed": {"eval_elapsed_time": 25.196182542014867, "cuda_eval_elapsed_time": 17.847160907745362}, "opt_eval_metrics": {"exact_match": 78.73226111636707, "f1": 86.74884583609185}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42070530, "linear_total": 84934656, "linear_nnz": 18112000, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451008, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 970752}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835264, "linear_attention_total": 2359296, "linear_attention_nnz": 620288, "linear_dense_total": 4718592, "linear_dense_nnz": 1214976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2000384, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1374720}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2271232, "linear_attention_total": 2359296, "linear_attention_nnz": 933376, "linear_dense_total": 4718592, "linear_dense_nnz": 1337856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2267904, "linear_attention_total": 2359296, "linear_attention_nnz": 862464, "linear_dense_total": 4718592, "linear_dense_nnz": 1405440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081536, "linear_attention_total": 2359296, "linear_attention_nnz": 783616, "linear_dense_total": 4718592, "linear_dense_nnz": 1297920}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1807104, "linear_attention_total": 2359296, "linear_attention_nnz": 773376, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602048, "linear_attention_total": 2359296, "linear_attention_nnz": 811008, "linear_dense_total": 4718592, "linear_dense_nnz": 791040}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1009920, "linear_attention_total": 2359296, "linear_attention_nnz": 572160, "linear_dense_total": 4718592, "linear_dense_nnz": 437760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 543232, "linear_attention_total": 2359296, "linear_attention_nnz": 392704, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649472, "linear_attention_total": 2359296, "linear_attention_nnz": 214784, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}}, "total_sparsity": 61.365323630075444, "linear_sparsity": 78.67537133487654}, "speed": {"eval_elapsed_time": 25.041145149618387, "cuda_eval_elapsed_time": 17.693899471282958}, "opt_eval_metrics": {"exact_match": 78.97824030274361, "f1": 86.77789246016766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41670402, "linear_total": 84934656, "linear_nnz": 17711872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1409024, "linear_attention_total": 2359296, "linear_attention_nnz": 468992, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1792000, "linear_attention_total": 2359296, "linear_attention_nnz": 606208, "linear_dense_total": 4718592, "linear_dense_nnz": 1185792}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1974272, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1348608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2231552, "linear_attention_total": 2359296, "linear_attention_nnz": 910592, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2209536, "linear_attention_total": 2359296, "linear_attention_nnz": 828672, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2046464, "linear_attention_total": 2359296, "linear_attention_nnz": 765440, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1764096, "linear_attention_total": 2359296, "linear_attention_nnz": 761088, "linear_dense_total": 4718592, "linear_dense_nnz": 1003008}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1573120, "linear_attention_total": 2359296, "linear_attention_nnz": 792832, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 986880, "linear_attention_total": 2359296, "linear_attention_nnz": 553728, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 572672, "linear_attention_total": 2359296, "linear_attention_nnz": 389888, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 525568, "linear_attention_total": 2359296, "linear_attention_nnz": 378112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}}, "total_sparsity": 61.73277361909495, "linear_sparsity": 79.14647231867285}, "speed": {"eval_elapsed_time": 24.600313658826053, "cuda_eval_elapsed_time": 17.256864818573}, "opt_eval_metrics": {"exact_match": 78.74172185430463, "f1": 86.69521763053608}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41478658, "linear_total": 84934656, "linear_nnz": 17520128, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404160, "linear_attention_total": 2359296, "linear_attention_nnz": 465664, "linear_dense_total": 4718592, "linear_dense_nnz": 938496}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1766912, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1182720}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1961216, "linear_attention_total": 2359296, "linear_attention_nnz": 615680, "linear_dense_total": 4718592, "linear_dense_nnz": 1345536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 895488, "linear_dense_total": 4718592, "linear_dense_nnz": 1314816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2189824, "linear_attention_total": 2359296, "linear_attention_nnz": 812032, "linear_dense_total": 4718592, "linear_dense_nnz": 1377792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2038016, "linear_attention_total": 2359296, "linear_attention_nnz": 755456, "linear_dense_total": 4718592, "linear_dense_nnz": 1282560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1738240, "linear_attention_total": 2359296, "linear_attention_nnz": 739840, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571584, "linear_attention_total": 2359296, "linear_attention_nnz": 797440, "linear_dense_total": 4718592, "linear_dense_nnz": 774144}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 943872, "linear_attention_total": 2359296, "linear_attention_nnz": 513792, "linear_dense_total": 4718592, "linear_dense_nnz": 430080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563968, "linear_attention_total": 2359296, "linear_attention_nnz": 381184, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516352, "linear_attention_total": 2359296, "linear_attention_nnz": 370432, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 615680, "linear_attention_total": 2359296, "linear_attention_nnz": 200960, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}}, "total_sparsity": 61.90885809879785, "linear_sparsity": 79.37222704475309}, "speed": {"eval_elapsed_time": 24.630300242919475, "cuda_eval_elapsed_time": 17.290757038116457}, "opt_eval_metrics": {"exact_match": 78.84578997161779, "f1": 86.78133258210022}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40218943, "linear_total": 84934656, "linear_nnz": 16260413, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725694, "linear_attention_total": 2359296, "linear_attention_nnz": 36794, "linear_dense_total": 4718592, "linear_dense_nnz": 1688900}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959620, "linear_attention_total": 2359296, "linear_attention_nnz": 233028, "linear_dense_total": 4718592, "linear_dense_nnz": 1726592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969125, "linear_attention_total": 2359296, "linear_attention_nnz": 194318, "linear_dense_total": 4718592, "linear_dense_nnz": 1774807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2012358, "linear_attention_total": 2359296, "linear_attention_nnz": 270153, "linear_dense_total": 4718592, "linear_dense_nnz": 1742205}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860862, "linear_attention_total": 2359296, "linear_attention_nnz": 207935, "linear_dense_total": 4718592, "linear_dense_nnz": 1652927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815188, "linear_attention_total": 2359296, "linear_attention_nnz": 215427, "linear_dense_total": 4718592, "linear_dense_nnz": 1599761}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518978, "linear_attention_total": 2359296, "linear_attention_nnz": 114563, "linear_dense_total": 4718592, "linear_dense_nnz": 1404415}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307646, "linear_attention_total": 2359296, "linear_attention_nnz": 165011, "linear_dense_total": 4718592, "linear_dense_nnz": 1142635}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 946142, "linear_attention_total": 2359296, "linear_attention_nnz": 86589, "linear_dense_total": 4718592, "linear_dense_nnz": 859553}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531809, "linear_attention_total": 2359296, "linear_attention_nnz": 110020, "linear_dense_total": 4718592, "linear_dense_nnz": 421789}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 419075, "linear_attention_total": 2359296, "linear_attention_nnz": 89475, "linear_dense_total": 4718592, "linear_dense_nnz": 329600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 193916, "linear_attention_total": 2359296, "linear_attention_nnz": 45791, "linear_dense_total": 4718592, "linear_dense_nnz": 148125}}, "total_sparsity": 63.065693568741764, "linear_sparsity": 80.85538487375518}, "speed": {"eval_elapsed_time": 32.1213519689627, "cuda_eval_elapsed_time": 24.584835762023925}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.30981160352648}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl5_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 81807426, "linear_total": 84934656, "linear_nnz": 57862144, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5237760, "linear_attention_total": 2359296, "linear_attention_nnz": 921600, "linear_dense_total": 4718592, "linear_dense_nnz": 4316160}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5140480, "linear_attention_total": 2359296, "linear_attention_nnz": 829440, "linear_dense_total": 4718592, "linear_dense_nnz": 4311040}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5641216, "linear_attention_total": 2359296, "linear_attention_nnz": 1221632, "linear_dense_total": 4718592, "linear_dense_nnz": 4419584}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5816320, "linear_attention_total": 2359296, "linear_attention_nnz": 1386496, "linear_dense_total": 4718592, "linear_dense_nnz": 4429824}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5997568, "linear_attention_total": 2359296, "linear_attention_nnz": 1540096, "linear_dense_total": 4718592, "linear_dense_nnz": 4457472}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5968896, "linear_attention_total": 2359296, "linear_attention_nnz": 1548288, "linear_dense_total": 4718592, "linear_dense_nnz": 4420608}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5685248, "linear_attention_total": 2359296, "linear_attention_nnz": 1364992, "linear_dense_total": 4718592, "linear_dense_nnz": 4320256}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5458944, "linear_attention_total": 2359296, "linear_attention_nnz": 1272832, "linear_dense_total": 4718592, "linear_dense_nnz": 4186112}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4961280, "linear_attention_total": 2359296, "linear_attention_nnz": 1173504, "linear_dense_total": 4718592, "linear_dense_nnz": 3787776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3566592, "linear_attention_total": 2359296, "linear_attention_nnz": 727040, "linear_dense_total": 4718592, "linear_dense_nnz": 2839552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673664, "linear_attention_total": 2359296, "linear_attention_nnz": 671744, "linear_dense_total": 4718592, "linear_dense_nnz": 2001920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1714176, "linear_attention_total": 2359296, "linear_attention_nnz": 409600, "linear_dense_total": 4718592, "linear_dense_nnz": 1304576}}, "total_sparsity": 24.873695953757846, "linear_sparsity": 31.87451774691358}, "speed": {"eval_elapsed_time": 39.364574735984206, "cuda_eval_elapsed_time": 31.781292793273927}, "opt_eval_metrics": {"exact_match": 81.47587511825922, "f1": 88.73698799207777}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl5_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 81295202, "linear_total": 84934656, "linear_nnz": 57351168, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5262336, "linear_attention_total": 2359296, "linear_attention_nnz": 804864, "linear_dense_total": 4718592, "linear_dense_nnz": 4457472}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5235712, "linear_attention_total": 2359296, "linear_attention_nnz": 771072, "linear_dense_total": 4718592, "linear_dense_nnz": 4464640}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5669888, "linear_attention_total": 2359296, "linear_attention_nnz": 1152000, "linear_dense_total": 4718592, "linear_dense_nnz": 4517888}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5875712, "linear_attention_total": 2359296, "linear_attention_nnz": 1312768, "linear_dense_total": 4718592, "linear_dense_nnz": 4562944}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6056960, "linear_attention_total": 2359296, "linear_attention_nnz": 1501184, "linear_dense_total": 4718592, "linear_dense_nnz": 4555776}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5898240, "linear_attention_total": 2359296, "linear_attention_nnz": 1377280, "linear_dense_total": 4718592, "linear_dense_nnz": 4520960}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5831680, "linear_attention_total": 2359296, "linear_attention_nnz": 1357824, "linear_dense_total": 4718592, "linear_dense_nnz": 4473856}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5483520, "linear_attention_total": 2359296, "linear_attention_nnz": 1192960, "linear_dense_total": 4718592, "linear_dense_nnz": 4290560}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4958208, "linear_attention_total": 2359296, "linear_attention_nnz": 1069056, "linear_dense_total": 4718592, "linear_dense_nnz": 3889152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3236864, "linear_attention_total": 2359296, "linear_attention_nnz": 718848, "linear_dense_total": 4718592, "linear_dense_nnz": 2518016}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2222080, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 1607680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1619968, "linear_attention_total": 2359296, "linear_attention_nnz": 389120, "linear_dense_total": 4718592, "linear_dense_nnz": 1230848}}, "total_sparsity": 25.344087186502197, "linear_sparsity": 32.47612847222222}, "speed": {"eval_elapsed_time": 38.83884137077257, "cuda_eval_elapsed_time": 31.28699700164795}, "opt_eval_metrics": {"exact_match": 81.51371807000946, "f1": 88.67903677006836}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39702836, "linear_total": 84934656, "linear_nnz": 15791104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125888, "linear_attention_total": 2359296, "linear_attention_nnz": 838656, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1188352, "linear_attention_total": 2359296, "linear_attention_nnz": 692224, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1694720, "linear_attention_total": 2359296, "linear_attention_nnz": 1089536, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1962496, "linear_attention_total": 2359296, "linear_attention_nnz": 1291264, "linear_dense_total": 4718592, "linear_dense_nnz": 671232}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2112512, "linear_attention_total": 2359296, "linear_attention_nnz": 1384448, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1783296, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1632768, "linear_attention_total": 2359296, "linear_attention_nnz": 1127424, "linear_dense_total": 4718592, "linear_dense_nnz": 505344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1333760, "linear_attention_total": 2359296, "linear_attention_nnz": 942080, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1204736, "linear_attention_total": 2359296, "linear_attention_nnz": 982016, "linear_dense_total": 4718592, "linear_dense_nnz": 222720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 729600, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 573952, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 449024, "linear_attention_total": 2359296, "linear_attention_nnz": 293888, "linear_dense_total": 4718592, "linear_dense_nnz": 155136}}, "total_sparsity": 63.53965068117302, "linear_sparsity": 81.40793788580247}, "speed": {"eval_elapsed_time": 24.72419478977099, "cuda_eval_elapsed_time": 17.39827905654907}, "opt_eval_metrics": {"exact_match": 79.20529801324503, "f1": 87.11181141207972}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41908226, "linear_total": 84934656, "linear_nnz": 17949696, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081280, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 606720}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529856, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 841728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2437632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2115072, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1927680, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1448448, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 668160}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665088, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 173568}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 595968, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.51437244200017, "linear_sparsity": 78.86646412037037}, "speed": {"eval_elapsed_time": 20.012413467280567, "cuda_eval_elapsed_time": 12.710339965820312}, "opt_eval_metrics": {"exact_match": 78.05108798486282, "f1": 85.81174728555466}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41954306, "linear_total": 84934656, "linear_nnz": 17995776, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2079744, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1626624, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 840192}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434560, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1058304}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2116608, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1035264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1829376, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886208, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1099776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497600, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1210368, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 764928, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 546816, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.4720557446083, "linear_sparsity": 78.81221064814815}, "speed": {"eval_elapsed_time": 19.88829416874796, "cuda_eval_elapsed_time": 12.579588932037353}, "opt_eval_metrics": {"exact_match": 77.62535477767265, "f1": 85.49958980627748}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 36346370, "linear_total": 84934656, "linear_nnz": 12387840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1721856, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 950784, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1857024, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437696, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1113600, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1015296, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 425472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 731136, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 999936, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 213504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 296448, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 99840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 614400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 122880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}}, "total_sparsity": 66.62199781720042, "linear_sparsity": 85.4148582175926}, "speed": {"eval_elapsed_time": 18.05568285798654, "cuda_eval_elapsed_time": 10.772465507507324}, "opt_eval_metrics": {"exact_match": 76.75496688741723, "f1": 84.83470649534952}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 35750402, "linear_total": 84934656, "linear_nnz": 11791872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893952, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1247232, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 520704}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 978432, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 388608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 365568, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}}, "total_sparsity": 67.16929377013544, "linear_sparsity": 86.11653645833334}, "speed": {"eval_elapsed_time": 17.86702682590112, "cuda_eval_elapsed_time": 10.594800506591797}, "opt_eval_metrics": {"exact_match": 76.3197729422895, "f1": 84.62201750681498}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 35662850, "linear_total": 84934656, "linear_nnz": 11704320, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 897024, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 258048}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804800, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1184256, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1064448, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 474624}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 364032, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}}, "total_sparsity": 67.24969549518002, "linear_sparsity": 86.21961805555556}, "speed": {"eval_elapsed_time": 17.854051379021257, "cuda_eval_elapsed_time": 10.574438259124756}, "opt_eval_metrics": {"exact_match": 76.5279091769158, "f1": 84.6776690586996}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35638274, "linear_total": 84934656, "linear_nnz": 11679744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1586688, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 210432}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887808, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1175040, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 486912}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1062912, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 290304, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 93696}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360960, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 164352}}, "total_sparsity": 67.27226440045568, "linear_sparsity": 86.24855324074075}, "speed": {"eval_elapsed_time": 17.833505778107792, "cuda_eval_elapsed_time": 10.569285308837891}, "opt_eval_metrics": {"exact_match": 76.31031220435194, "f1": 84.63605545666391}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33844482, "linear_total": 84934656, "linear_nnz": 9885952, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701184, "linear_attention_total": 2359296, "linear_attention_nnz": 295680, "linear_dense_total": 4718592, "linear_dense_nnz": 405504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042688, "linear_attention_total": 2359296, "linear_attention_nnz": 380672, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1087744, "linear_attention_total": 2359296, "linear_attention_nnz": 328960, "linear_dense_total": 4718592, "linear_dense_nnz": 758784}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1340672, "linear_attention_total": 2359296, "linear_attention_nnz": 612608, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1142784, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 811008}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1165312, "linear_attention_total": 2359296, "linear_attention_nnz": 411136, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908032, "linear_attention_total": 2359296, "linear_attention_nnz": 319744, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 956672, "linear_attention_total": 2359296, "linear_attention_nnz": 457472, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 557056, "linear_attention_total": 2359296, "linear_attention_nnz": 246784, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360192, "linear_attention_total": 2359296, "linear_attention_nnz": 252672, "linear_dense_total": 4718592, "linear_dense_nnz": 107520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315904, "linear_attention_total": 2359296, "linear_attention_nnz": 202240, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 307712, "linear_attention_total": 2359296, "linear_attention_nnz": 129536, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}}, "total_sparsity": 68.91955939281638, "linear_sparsity": 88.36052035108025}, "speed": {"eval_elapsed_time": 21.356581350788474, "cuda_eval_elapsed_time": 14.03688655090332}, "opt_eval_metrics": {"exact_match": 76.33869441816462, "f1": 84.90005817955239}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 46753113, "linear_total": 84934656, "linear_nnz": 22841856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2904576, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1430016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2368512, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1582080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3084288, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1904640}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2600448, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1715712}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2244096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1654272}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096640, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1703424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1910784, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1476096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1082880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1534464, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 748032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523776, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 327168}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 990720, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1107456, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 812544}}, "total_sparsity": 57.06516200196401, "linear_sparsity": 73.10655381944444}, "speed": {"eval_elapsed_time": 20.885264513082802, "cuda_eval_elapsed_time": 13.536273368835449}, "opt_eval_metrics": {"exact_match": 78.31598864711448, "f1": 86.14732314693939}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 46580969, "linear_total": 84934656, "linear_nnz": 22669824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2883072, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1408512}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2342400, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1555968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3055104, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1875456}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2585088, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1700352}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1635840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2082816, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1689600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1901568, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1311744}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1468416, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1075200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1528320, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 741888}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520704, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 324096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1093632, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}}, "total_sparsity": 57.22324719197764, "linear_sparsity": 73.30910011574075}, "speed": {"eval_elapsed_time": 20.782163904979825, "cuda_eval_elapsed_time": 13.420236305236816}, "opt_eval_metrics": {"exact_match": 77.96594134342479, "f1": 85.85795020085484}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46293486, "linear_total": 84934656, "linear_nnz": 22382592, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850816, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1376256}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2323968, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1537536}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3022848, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1843200}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2557440, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1672704}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1620480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2068992, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1675776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790976, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1299456}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1462272, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1523712, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 737280}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 322560}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 970752, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 786432}}, "total_sparsity": 57.487251773494805, "linear_sparsity": 73.6472800925926}, "speed": {"eval_elapsed_time": 20.441790327895433, "cuda_eval_elapsed_time": 13.101784587860108}, "opt_eval_metrics": {"exact_match": 77.88079470198676, "f1": 85.81326419854291}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl300_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 26853628, "linear_total": 84934656, "linear_nnz": 2895098, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 357857, "linear_attention_total": 2359296, "linear_attention_nnz": 13195, "linear_dense_total": 4718592, "linear_dense_nnz": 344662}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 405482, "linear_attention_total": 2359296, "linear_attention_nnz": 53357, "linear_dense_total": 4718592, "linear_dense_nnz": 352125}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 395119, "linear_attention_total": 2359296, "linear_attention_nnz": 43981, "linear_dense_total": 4718592, "linear_dense_nnz": 351138}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 394117, "linear_attention_total": 2359296, "linear_attention_nnz": 71058, "linear_dense_total": 4718592, "linear_dense_nnz": 323059}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 335373, "linear_attention_total": 2359296, "linear_attention_nnz": 47705, "linear_dense_total": 4718592, "linear_dense_nnz": 287668}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 292526, "linear_attention_total": 2359296, "linear_attention_nnz": 40348, "linear_dense_total": 4718592, "linear_dense_nnz": 252178}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 238114, "linear_attention_total": 2359296, "linear_attention_nnz": 33002, "linear_dense_total": 4718592, "linear_dense_nnz": 205112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 188891, "linear_attention_total": 2359296, "linear_attention_nnz": 38753, "linear_dense_total": 4718592, "linear_dense_nnz": 150138}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 123365, "linear_attention_total": 2359296, "linear_attention_nnz": 22052, "linear_dense_total": 4718592, "linear_dense_nnz": 101313}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64415, "linear_attention_total": 2359296, "linear_attention_nnz": 28498, "linear_dense_total": 4718592, "linear_dense_nnz": 35917}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 53470, "linear_attention_total": 2359296, "linear_attention_nnz": 18747, "linear_dense_total": 4718592, "linear_dense_nnz": 34723}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 46369, "linear_attention_total": 2359296, "linear_attention_nnz": 15957, "linear_dense_total": 4718592, "linear_dense_nnz": 30412}}, "total_sparsity": 75.33947808267818, "linear_sparsity": 96.59138196780358}, "speed": {"eval_elapsed_time": 32.84684946574271, "cuda_eval_elapsed_time": 25.38143817138672}, "opt_eval_metrics": {"exact_match": 76.98202459791864, "f1": 85.40699359564026}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 40641026, "linear_total": 84934656, "linear_nnz": 16682496, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1930752, "linear_attention_total": 2359296, "linear_attention_nnz": 1390080, "linear_dense_total": 4718592, "linear_dense_nnz": 540672}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1347840, "linear_attention_total": 2359296, "linear_attention_nnz": 622848, "linear_dense_total": 4718592, "linear_dense_nnz": 724992}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2423808, "linear_attention_total": 2359296, "linear_attention_nnz": 1506816, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1864704, "linear_attention_total": 2359296, "linear_attention_nnz": 966144, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1956096, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 990720}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742592, "linear_attention_total": 2359296, "linear_attention_nnz": 734976, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1323264, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835584, "linear_attention_total": 2359296, "linear_attention_nnz": 282624, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307904, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 403968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 536064, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 506880, "linear_attention_total": 2359296, "linear_attention_nnz": 322560, "linear_dense_total": 4718592, "linear_dense_nnz": 184320}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 761088, "linear_attention_total": 2359296, "linear_attention_nnz": 412416, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}}, "total_sparsity": 62.67808162027695, "linear_sparsity": 80.3584346064815}, "speed": {"eval_elapsed_time": 20.462010452058166, "cuda_eval_elapsed_time": 13.147123012542725}, "opt_eval_metrics": {"exact_match": 76.13055818353831, "f1": 84.59415607632204}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 39460610, "linear_total": 84934656, "linear_nnz": 15502080, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1801728, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1127424, "linear_attention_total": 2359296, "linear_attention_nnz": 471552, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2341632, "linear_attention_total": 2359296, "linear_attention_nnz": 1507584, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804032, "linear_attention_total": 2359296, "linear_attention_nnz": 960768, "linear_dense_total": 4718592, "linear_dense_nnz": 843264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899264, "linear_attention_total": 2359296, "linear_attention_nnz": 968448, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529088, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 564480, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738048, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 506880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 893952, "linear_dense_total": 4718592, "linear_dense_nnz": 384000}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668928, "linear_attention_total": 2359296, "linear_attention_nnz": 535296, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488448, "linear_attention_total": 2359296, "linear_attention_nnz": 319488, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 560640, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}}, "total_sparsity": 63.76209435179903, "linear_sparsity": 81.7482277199074}, "speed": {"eval_elapsed_time": 20.03693932434544, "cuda_eval_elapsed_time": 12.724558185577393}, "opt_eval_metrics": {"exact_match": 76.20624408703878, "f1": 84.78885528858153}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 39496706, "linear_total": 84934656, "linear_nnz": 15538176, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798656, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1122816, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 649728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2325504, "linear_attention_total": 2359296, "linear_attention_nnz": 1500672, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790208, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886976, "linear_attention_total": 2359296, "linear_attention_nnz": 963840, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522944, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 924672}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 558336, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 737280, "linear_attention_total": 2359296, "linear_attention_nnz": 235008, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1286400, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 382464}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665856, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 417792, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 559104, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 301056}}, "total_sparsity": 63.72894627217538, "linear_sparsity": 81.70572916666666}, "speed": {"eval_elapsed_time": 19.969059734605253, "cuda_eval_elapsed_time": 12.67270662689209}, "opt_eval_metrics": {"exact_match": 75.67644276253547, "f1": 84.4740049617883}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39419906, "linear_total": 84934656, "linear_nnz": 15461376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1800192, "linear_attention_total": 2359296, "linear_attention_nnz": 1334784, "linear_dense_total": 4718592, "linear_dense_nnz": 465408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1118976, "linear_attention_total": 2359296, "linear_attention_nnz": 473856, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2320896, "linear_attention_total": 2359296, "linear_attention_nnz": 1497600, "linear_dense_total": 4718592, "linear_dense_nnz": 823296}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1788672, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 832512}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1891584, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1520640, "linear_attention_total": 2359296, "linear_attention_nnz": 600576, "linear_dense_total": 4718592, "linear_dense_nnz": 920064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 732672, "linear_attention_total": 2359296, "linear_attention_nnz": 230400, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277184, "linear_attention_total": 2359296, "linear_attention_nnz": 897792, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 660480, "linear_attention_total": 2359296, "linear_attention_nnz": 528384, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555264, "linear_attention_total": 2359296, "linear_attention_nnz": 257280, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.79947410116185, "linear_sparsity": 81.79615162037037}, "speed": {"eval_elapsed_time": 20.011237109079957, "cuda_eval_elapsed_time": 12.665436817169189}, "opt_eval_metrics": {"exact_match": 76.0170293282876, "f1": 84.48208063503463}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39382274, "linear_total": 84934656, "linear_nnz": 15423744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1793280, "linear_attention_total": 2359296, "linear_attention_nnz": 1323264, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1115136, "linear_attention_total": 2359296, "linear_attention_nnz": 470016, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2321664, "linear_attention_total": 2359296, "linear_attention_nnz": 1496832, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1789440, "linear_attention_total": 2359296, "linear_attention_nnz": 960000, "linear_dense_total": 4718592, "linear_dense_nnz": 829440}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1843968, "linear_attention_total": 2359296, "linear_attention_nnz": 917760, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526016, "linear_attention_total": 2359296, "linear_attention_nnz": 607488, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 567552, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730368, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1279488, "linear_attention_total": 2359296, "linear_attention_nnz": 900096, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667392, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 556032, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.83403273736522, "linear_sparsity": 81.84045862268519}, "speed": {"eval_elapsed_time": 19.805885411333293, "cuda_eval_elapsed_time": 12.501174209594726}, "opt_eval_metrics": {"exact_match": 75.93188268684958, "f1": 84.50981123274157}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 45409666, "linear_total": 84934656, "linear_nnz": 21492736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1715200, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 1234944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895424, "linear_attention_total": 2359296, "linear_attention_nnz": 400384, "linear_dense_total": 4718592, "linear_dense_nnz": 1495040}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3012608, "linear_attention_total": 2359296, "linear_attention_nnz": 594944, "linear_dense_total": 4718592, "linear_dense_nnz": 2417664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3094528, "linear_attention_total": 2359296, "linear_attention_nnz": 813056, "linear_dense_total": 4718592, "linear_dense_nnz": 2281472}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2762752, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 2163712}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2707456, "linear_attention_total": 2359296, "linear_attention_nnz": 562176, "linear_dense_total": 4718592, "linear_dense_nnz": 2145280}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2233344, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1741824, "linear_attention_total": 2359296, "linear_attention_nnz": 678912, "linear_dense_total": 4718592, "linear_dense_nnz": 1062912}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 709632, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 370688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 524288, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 164864}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 593920, "linear_attention_total": 2359296, "linear_attention_nnz": 267264, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 163840, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}}, "total_sparsity": 58.29889117212532, "linear_sparsity": 74.6949749228395}, "speed": {"eval_elapsed_time": 25.591839706059545, "cuda_eval_elapsed_time": 18.203727100372316}, "opt_eval_metrics": {"exact_match": 77.01986754966887, "f1": 85.2617013700351}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 44308674, "linear_total": 84934656, "linear_nnz": 20392960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1598464, "linear_attention_total": 2359296, "linear_attention_nnz": 458752, "linear_dense_total": 4718592, "linear_dense_nnz": 1139712}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825792, "linear_attention_total": 2359296, "linear_attention_nnz": 398336, "linear_dense_total": 4718592, "linear_dense_nnz": 1427456}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2854912, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 2257920}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2905088, "linear_attention_total": 2359296, "linear_attention_nnz": 781312, "linear_dense_total": 4718592, "linear_dense_nnz": 2123776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2643968, "linear_attention_total": 2359296, "linear_attention_nnz": 620544, "linear_dense_total": 4718592, "linear_dense_nnz": 2023424}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2543616, "linear_attention_total": 2359296, "linear_attention_nnz": 573440, "linear_dense_total": 4718592, "linear_dense_nnz": 1970176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2049024, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 1588224}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1708032, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 666624, "linear_attention_total": 2359296, "linear_attention_nnz": 307200, "linear_dense_total": 4718592, "linear_dense_nnz": 359424}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489472, "linear_attention_total": 2359296, "linear_attention_nnz": 327680, "linear_dense_total": 4718592, "linear_dense_nnz": 161792}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598016, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 509952, "linear_attention_total": 2359296, "linear_attention_nnz": 162816, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}}, "total_sparsity": 59.309966373837206, "linear_sparsity": 75.98982445987654}, "speed": {"eval_elapsed_time": 25.33708621514961, "cuda_eval_elapsed_time": 17.965915870666503}, "opt_eval_metrics": {"exact_match": 76.98202459791864, "f1": 85.22056943761015}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 43172098, "linear_total": 84934656, "linear_nnz": 19257344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1469440, "linear_attention_total": 2359296, "linear_attention_nnz": 443392, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 396288, "linear_dense_total": 4718592, "linear_dense_nnz": 1296384}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692096, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 2113536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2728960, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 1973248}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2531328, "linear_attention_total": 2359296, "linear_attention_nnz": 565248, "linear_dense_total": 4718592, "linear_dense_nnz": 1966080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434048, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 1887232}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1978368, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 1502208}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638400, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1000448}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 620544, "linear_attention_total": 2359296, "linear_attention_nnz": 310272, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 457728, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 308224}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 467968, "linear_attention_total": 2359296, "linear_attention_nnz": 152576, "linear_dense_total": 4718592, "linear_dense_nnz": 315392}}, "total_sparsity": 60.35371946964616, "linear_sparsity": 77.3268711419753}, "speed": {"eval_elapsed_time": 25.03162538493052, "cuda_eval_elapsed_time": 17.661002613067627}, "opt_eval_metrics": {"exact_match": 76.92526017029329, "f1": 85.21713644985097}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42975330, "linear_total": 84934656, "linear_nnz": 19060736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1463296, "linear_attention_total": 2359296, "linear_attention_nnz": 455680, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1699840, "linear_attention_total": 2359296, "linear_attention_nnz": 399360, "linear_dense_total": 4718592, "linear_dense_nnz": 1300480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2724864, "linear_attention_total": 2359296, "linear_attention_nnz": 544768, "linear_dense_total": 4718592, "linear_dense_nnz": 2180096}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2670592, "linear_attention_total": 2359296, "linear_attention_nnz": 731136, "linear_dense_total": 4718592, "linear_dense_nnz": 1939456}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2498560, "linear_attention_total": 2359296, "linear_attention_nnz": 557056, "linear_dense_total": 4718592, "linear_dense_nnz": 1941504}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2407424, "linear_attention_total": 2359296, "linear_attention_nnz": 527360, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1928192, "linear_attention_total": 2359296, "linear_attention_nnz": 472064, "linear_dense_total": 4718592, "linear_dense_nnz": 1456128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 607232, "linear_dense_total": 4718592, "linear_dense_nnz": 977920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 317440}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455680, "linear_attention_total": 2359296, "linear_attention_nnz": 308224, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 240640, "linear_dense_total": 4718592, "linear_dense_nnz": 305152}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 329728}}, "total_sparsity": 60.53441764482857, "linear_sparsity": 77.55835262345678}, "speed": {"eval_elapsed_time": 24.923150293063372, "cuda_eval_elapsed_time": 17.547844924926757}, "opt_eval_metrics": {"exact_match": 77.08609271523179, "f1": 85.20287591064626}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53955042, "linear_total": 84934656, "linear_nnz": 30029824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2392064, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 1844224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2719744, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 2172928}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3872768, "linear_attention_total": 2359296, "linear_attention_nnz": 675840, "linear_dense_total": 4718592, "linear_dense_nnz": 3196928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4077568, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 3111936}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4003840, "linear_attention_total": 2359296, "linear_attention_nnz": 896000, "linear_dense_total": 4718592, "linear_dense_nnz": 3107840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3832832, "linear_attention_total": 2359296, "linear_attention_nnz": 696320, "linear_dense_total": 4718592, "linear_dense_nnz": 3136512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3280896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 2525184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2510848, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1711104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257472, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 747520}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 420864, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 748544, "linear_attention_total": 2359296, "linear_attention_nnz": 356352, "linear_dense_total": 4718592, "linear_dense_nnz": 392192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 651264, "linear_attention_total": 2359296, "linear_attention_nnz": 217088, "linear_dense_total": 4718592, "linear_dense_nnz": 434176}}, "total_sparsity": 50.45140657377771, "linear_sparsity": 64.6436149691358}, "speed": {"eval_elapsed_time": 29.29546993318945, "cuda_eval_elapsed_time": 21.865024238586425}, "opt_eval_metrics": {"exact_match": 78.94985808893094, "f1": 86.768721062838}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47626001, "linear_total": 84934656, "linear_nnz": 23714304, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2895360, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2330112, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3240960, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2061312}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3095040, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1915392}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2291712, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1800192}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2221056, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1827840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1861632, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1300992, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 1104384}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637376, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 582144, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 385536}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1052160, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 560640}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1205760, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 910848}}, "total_sparsity": 56.2635617989908, "linear_sparsity": 72.07935474537037}, "speed": {"eval_elapsed_time": 20.96597335813567, "cuda_eval_elapsed_time": 13.629536018371581}, "opt_eval_metrics": {"exact_match": 77.78618732261117, "f1": 85.70556837897196}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 45695714, "linear_total": 84934656, "linear_nnz": 21777408, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1549312, "linear_attention_total": 2359296, "linear_attention_nnz": 679936, "linear_dense_total": 4718592, "linear_dense_nnz": 869376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1868800, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 1269760}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2739200, "linear_attention_total": 2359296, "linear_attention_nnz": 875520, "linear_dense_total": 4718592, "linear_dense_nnz": 1863680}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3088384, "linear_attention_total": 2359296, "linear_attention_nnz": 1137664, "linear_dense_total": 4718592, "linear_dense_nnz": 1950720}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2821120, "linear_attention_total": 2359296, "linear_attention_nnz": 1033216, "linear_dense_total": 4718592, "linear_dense_nnz": 1787904}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2709504, "linear_attention_total": 2359296, "linear_attention_nnz": 850944, "linear_dense_total": 4718592, "linear_dense_nnz": 1858560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225152, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1426432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 987136}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049600, "linear_attention_total": 2359296, "linear_attention_nnz": 782336, "linear_dense_total": 4718592, "linear_dense_nnz": 267264}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649216, "linear_attention_total": 2359296, "linear_attention_nnz": 504832, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 662528, "linear_attention_total": 2359296, "linear_attention_nnz": 379904, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 548864, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 290816}}, "total_sparsity": 58.036204395746125, "linear_sparsity": 74.35980902777779}, "speed": {"eval_elapsed_time": 27.047370922286063, "cuda_eval_elapsed_time": 19.637727821350097}, "opt_eval_metrics": {"exact_match": 77.92809839167455, "f1": 85.97854187426412}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 44413282, "linear_total": 84934656, "linear_nnz": 20496384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1436672, "linear_attention_total": 2359296, "linear_attention_nnz": 647168, "linear_dense_total": 4718592, "linear_dense_nnz": 789504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798144, "linear_attention_total": 2359296, "linear_attention_nnz": 591872, "linear_dense_total": 4718592, "linear_dense_nnz": 1206272}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2583552, "linear_attention_total": 2359296, "linear_attention_nnz": 843776, "linear_dense_total": 4718592, "linear_dense_nnz": 1739776}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2975744, "linear_attention_total": 2359296, "linear_attention_nnz": 1118208, "linear_dense_total": 4718592, "linear_dense_nnz": 1857536}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1760256}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2509824, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 1718272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2085888, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 1330176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1731584, "linear_attention_total": 2359296, "linear_attention_nnz": 827392, "linear_dense_total": 4718592, "linear_dense_nnz": 904192}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 726016, "linear_dense_total": 4718592, "linear_dense_nnz": 257024}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 464896, "linear_dense_total": 4718592, "linear_dense_nnz": 118784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 622592, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 263168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 512000, "linear_attention_total": 2359296, "linear_attention_nnz": 240640, "linear_dense_total": 4718592, "linear_dense_nnz": 271360}}, "total_sparsity": 59.21390159343854, "linear_sparsity": 75.86805555555556}, "speed": {"eval_elapsed_time": 26.582174220122397, "cuda_eval_elapsed_time": 19.21487816619873}, "opt_eval_metrics": {"exact_match": 77.8713339640492, "f1": 85.84893170709621}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 67469538, "linear_total": 84934656, "linear_nnz": 43535360, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4336640, "linear_attention_total": 2359296, "linear_attention_nnz": 571392, "linear_dense_total": 4718592, "linear_dense_nnz": 3765248}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4451328, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 3852288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4783104, "linear_attention_total": 2359296, "linear_attention_nnz": 695296, "linear_dense_total": 4718592, "linear_dense_nnz": 4087808}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5047296, "linear_attention_total": 2359296, "linear_attention_nnz": 996352, "linear_dense_total": 4718592, "linear_dense_nnz": 4050944}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5032960, "linear_attention_total": 2359296, "linear_attention_nnz": 923648, "linear_dense_total": 4718592, "linear_dense_nnz": 4109312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4907008, "linear_attention_total": 2359296, "linear_attention_nnz": 865280, "linear_dense_total": 4718592, "linear_dense_nnz": 4041728}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4636672, "linear_attention_total": 2359296, "linear_attention_nnz": 778240, "linear_dense_total": 4718592, "linear_dense_nnz": 3858432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4243456, "linear_attention_total": 2359296, "linear_attention_nnz": 883712, "linear_dense_total": 4718592, "linear_dense_nnz": 3359744}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2818048, "linear_attention_total": 2359296, "linear_attention_nnz": 513024, "linear_dense_total": 4718592, "linear_dense_nnz": 2305024}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1289216, "linear_attention_total": 2359296, "linear_attention_nnz": 462848, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1047552, "linear_attention_total": 2359296, "linear_attention_nnz": 374784, "linear_dense_total": 4718592, "linear_dense_nnz": 672768}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 942080, "linear_attention_total": 2359296, "linear_attention_nnz": 235520, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}}, "total_sparsity": 38.04062450702838, "linear_sparsity": 48.742525077160494}, "speed": {"eval_elapsed_time": 33.15431842627004, "cuda_eval_elapsed_time": 25.63935887145996}, "opt_eval_metrics": {"exact_match": 79.67833491012298, "f1": 87.14623278516426}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 64400930, "linear_total": 84934656, "linear_nnz": 40469504, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3907584, "linear_attention_total": 2359296, "linear_attention_nnz": 527360, "linear_dense_total": 4718592, "linear_dense_nnz": 3380224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4186112, "linear_attention_total": 2359296, "linear_attention_nnz": 524288, "linear_dense_total": 4718592, "linear_dense_nnz": 3661824}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4629504, "linear_attention_total": 2359296, "linear_attention_nnz": 598016, "linear_dense_total": 4718592, "linear_dense_nnz": 4031488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5038080, "linear_attention_total": 2359296, "linear_attention_nnz": 930816, "linear_dense_total": 4718592, "linear_dense_nnz": 4107264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4822016, "linear_attention_total": 2359296, "linear_attention_nnz": 824320, "linear_dense_total": 4718592, "linear_dense_nnz": 3997696}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4773888, "linear_attention_total": 2359296, "linear_attention_nnz": 746496, "linear_dense_total": 4718592, "linear_dense_nnz": 4027392}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4408320, "linear_attention_total": 2359296, "linear_attention_nnz": 670720, "linear_dense_total": 4718592, "linear_dense_nnz": 3737600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3789824, "linear_attention_total": 2359296, "linear_attention_nnz": 794624, "linear_dense_total": 4718592, "linear_dense_nnz": 2995200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2176000, "linear_attention_total": 2359296, "linear_attention_nnz": 419840, "linear_dense_total": 4718592, "linear_dense_nnz": 1756160}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1011712, "linear_attention_total": 2359296, "linear_attention_nnz": 411648, "linear_dense_total": 4718592, "linear_dense_nnz": 600064}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 903168, "linear_attention_total": 2359296, "linear_attention_nnz": 307200, "linear_dense_total": 4718592, "linear_dense_nnz": 595968}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 823296, "linear_attention_total": 2359296, "linear_attention_nnz": 207872, "linear_dense_total": 4718592, "linear_dense_nnz": 615424}}, "total_sparsity": 40.85862268737366, "linear_sparsity": 52.35218942901234}, "speed": {"eval_elapsed_time": 32.20074478490278, "cuda_eval_elapsed_time": 24.727313552856447}, "opt_eval_metrics": {"exact_match": 79.29990539262063, "f1": 87.09851869948527}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 64383586, "linear_total": 84934656, "linear_nnz": 40452096, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3881984, "linear_attention_total": 2359296, "linear_attention_nnz": 501760, "linear_dense_total": 4718592, "linear_dense_nnz": 3380224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4185088, "linear_attention_total": 2359296, "linear_attention_nnz": 528384, "linear_dense_total": 4718592, "linear_dense_nnz": 3656704}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4703232, "linear_attention_total": 2359296, "linear_attention_nnz": 581632, "linear_dense_total": 4718592, "linear_dense_nnz": 4121600}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5060608, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 4144128}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4893696, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 4060160}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4817920, "linear_attention_total": 2359296, "linear_attention_nnz": 741376, "linear_dense_total": 4718592, "linear_dense_nnz": 4076544}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4459520, "linear_attention_total": 2359296, "linear_attention_nnz": 644096, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3720192, "linear_attention_total": 2359296, "linear_attention_nnz": 757760, "linear_dense_total": 4718592, "linear_dense_nnz": 2962432}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2070528, "linear_attention_total": 2359296, "linear_attention_nnz": 380928, "linear_dense_total": 4718592, "linear_dense_nnz": 1689600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 966656, "linear_attention_total": 2359296, "linear_attention_nnz": 395264, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 874496, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 561152}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818176, "linear_attention_total": 2359296, "linear_attention_nnz": 200704, "linear_dense_total": 4718592, "linear_dense_nnz": 617472}}, "total_sparsity": 40.874550222086434, "linear_sparsity": 52.37268518518518}, "speed": {"eval_elapsed_time": 32.14651732798666, "cuda_eval_elapsed_time": 24.66180950164795}, "opt_eval_metrics": {"exact_match": 79.22421948912014, "f1": 87.0664817371684}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41117954, "linear_total": 84934656, "linear_nnz": 17159424, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879296, "linear_attention_total": 2359296, "linear_attention_nnz": 1459968, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1487616, "linear_attention_total": 2359296, "linear_attention_nnz": 930048, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2451456, "linear_attention_total": 2359296, "linear_attention_nnz": 1651200, "linear_dense_total": 4718592, "linear_dense_nnz": 800256}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959168, "linear_attention_total": 2359296, "linear_attention_nnz": 1181952, "linear_dense_total": 4718592, "linear_dense_nnz": 777216}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1876992, "linear_attention_total": 2359296, "linear_attention_nnz": 996864, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 720384, "linear_dense_total": 4718592, "linear_dense_nnz": 886272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1242624, "linear_attention_total": 2359296, "linear_attention_nnz": 595968, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1026048, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1362432, "linear_attention_total": 2359296, "linear_attention_nnz": 1029120, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784128, "linear_attention_total": 2359296, "linear_attention_nnz": 673536, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563712, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.240103802270966, "linear_sparsity": 79.7969111689815}, "speed": {"eval_elapsed_time": 21.587445000186563, "cuda_eval_elapsed_time": 14.26605199432373}, "opt_eval_metrics": {"exact_match": 78.00378429517502, "f1": 85.86131877012127}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41037314, "linear_total": 84934656, "linear_nnz": 17078784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1881600, "linear_attention_total": 2359296, "linear_attention_nnz": 1460736, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488384, "linear_attention_total": 2359296, "linear_attention_nnz": 930816, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2430720, "linear_attention_total": 2359296, "linear_attention_nnz": 1636608, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1953024, "linear_attention_total": 2359296, "linear_attention_nnz": 1172736, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1822464, "linear_attention_total": 2359296, "linear_attention_nnz": 946944, "linear_dense_total": 4718592, "linear_dense_nnz": 875520}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602816, "linear_attention_total": 2359296, "linear_attention_nnz": 719616, "linear_dense_total": 4718592, "linear_dense_nnz": 883200}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1248768, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023744, "linear_attention_total": 2359296, "linear_attention_nnz": 530688, "linear_dense_total": 4718592, "linear_dense_nnz": 493056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360128, "linear_attention_total": 2359296, "linear_attention_nnz": 1026816, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 785664, "linear_attention_total": 2359296, "linear_attention_nnz": 675072, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 562176, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.31415802270676, "linear_sparsity": 79.89185474537037}, "speed": {"eval_elapsed_time": 21.546934511046857, "cuda_eval_elapsed_time": 14.242158630371094}, "opt_eval_metrics": {"exact_match": 78.04162724692526, "f1": 85.89832211406967}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 65744386, "linear_total": 84934656, "linear_nnz": 41809920, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3775488, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 3140608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4079616, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 3477504}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4937728, "linear_attention_total": 2359296, "linear_attention_nnz": 1008640, "linear_dense_total": 4718592, "linear_dense_nnz": 3929088}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5286912, "linear_attention_total": 2359296, "linear_attention_nnz": 1197056, "linear_dense_total": 4718592, "linear_dense_nnz": 4089856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5135360, "linear_attention_total": 2359296, "linear_attention_nnz": 1181696, "linear_dense_total": 4718592, "linear_dense_nnz": 3953664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5012480, "linear_attention_total": 2359296, "linear_attention_nnz": 1005568, "linear_dense_total": 4718592, "linear_dense_nnz": 4006912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4720640, "linear_attention_total": 2359296, "linear_attention_nnz": 1043456, "linear_dense_total": 4718592, "linear_dense_nnz": 3677184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3708928, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 2777088}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2311168, "linear_attention_total": 2359296, "linear_attention_nnz": 862208, "linear_dense_total": 4718592, "linear_dense_nnz": 1448960}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1058816, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 458752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 951296, "linear_attention_total": 2359296, "linear_attention_nnz": 456704, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 831488, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 541696}}, "total_sparsity": 39.6248852522324, "linear_sparsity": 50.774016203703695}, "speed": {"eval_elapsed_time": 34.00236483197659, "cuda_eval_elapsed_time": 26.484780250549317}, "opt_eval_metrics": {"exact_match": 80.48249763481552, "f1": 87.91705961229685}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 38110440, "linear_total": 84934656, "linear_nnz": 14151910, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1521793, "linear_attention_total": 2359296, "linear_attention_nnz": 87221, "linear_dense_total": 4718592, "linear_dense_nnz": 1434572}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637844, "linear_attention_total": 2359296, "linear_attention_nnz": 157517, "linear_dense_total": 4718592, "linear_dense_nnz": 1480327}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1723746, "linear_attention_total": 2359296, "linear_attention_nnz": 188172, "linear_dense_total": 4718592, "linear_dense_nnz": 1535574}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742961, "linear_attention_total": 2359296, "linear_attention_nnz": 230341, "linear_dense_total": 4718592, "linear_dense_nnz": 1512620}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1687428, "linear_attention_total": 2359296, "linear_attention_nnz": 240387, "linear_dense_total": 4718592, "linear_dense_nnz": 1447041}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1623377, "linear_attention_total": 2359296, "linear_attention_nnz": 195780, "linear_dense_total": 4718592, "linear_dense_nnz": 1427597}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1429982, "linear_attention_total": 2359296, "linear_attention_nnz": 184963, "linear_dense_total": 4718592, "linear_dense_nnz": 1245019}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130199, "linear_attention_total": 2359296, "linear_attention_nnz": 172954, "linear_dense_total": 4718592, "linear_dense_nnz": 957245}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 773896, "linear_attention_total": 2359296, "linear_attention_nnz": 138133, "linear_dense_total": 4718592, "linear_dense_nnz": 635763}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417863, "linear_attention_total": 2359296, "linear_attention_nnz": 112972, "linear_dense_total": 4718592, "linear_dense_nnz": 304891}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279992, "linear_attention_total": 2359296, "linear_attention_nnz": 75446, "linear_dense_total": 4718592, "linear_dense_nnz": 204546}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 182829, "linear_attention_total": 2359296, "linear_attention_nnz": 38439, "linear_dense_total": 4718592, "linear_dense_nnz": 144390}}, "total_sparsity": 65.00199746198996, "linear_sparsity": 83.3378850677867}, "speed": {"eval_elapsed_time": 42.85027553932741, "cuda_eval_elapsed_time": 35.27284997558594}, "opt_eval_metrics": {"exact_match": 80.22705771050141, "f1": 88.08154392563726}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37366845, "linear_total": 84934656, "linear_nnz": 13408315, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1442154, "linear_attention_total": 2359296, "linear_attention_nnz": 79341, "linear_dense_total": 4718592, "linear_dense_nnz": 1362813}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1557975, "linear_attention_total": 2359296, "linear_attention_nnz": 146964, "linear_dense_total": 4718592, "linear_dense_nnz": 1411011}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637409, "linear_attention_total": 2359296, "linear_attention_nnz": 173655, "linear_dense_total": 4718592, "linear_dense_nnz": 1463754}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1655712, "linear_attention_total": 2359296, "linear_attention_nnz": 213353, "linear_dense_total": 4718592, "linear_dense_nnz": 1442359}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601748, "linear_attention_total": 2359296, "linear_attention_nnz": 221518, "linear_dense_total": 4718592, "linear_dense_nnz": 1380230}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1539647, "linear_attention_total": 2359296, "linear_attention_nnz": 179373, "linear_dense_total": 4718592, "linear_dense_nnz": 1360274}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1352289, "linear_attention_total": 2359296, "linear_attention_nnz": 168393, "linear_dense_total": 4718592, "linear_dense_nnz": 1183896}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1066215, "linear_attention_total": 2359296, "linear_attention_nnz": 159612, "linear_dense_total": 4718592, "linear_dense_nnz": 906603}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727923, "linear_attention_total": 2359296, "linear_attention_nnz": 127230, "linear_dense_total": 4718592, "linear_dense_nnz": 600693}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 390947, "linear_attention_total": 2359296, "linear_attention_nnz": 105257, "linear_dense_total": 4718592, "linear_dense_nnz": 285690}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 262617, "linear_attention_total": 2359296, "linear_attention_nnz": 70746, "linear_dense_total": 4718592, "linear_dense_nnz": 191871}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 173679, "linear_attention_total": 2359296, "linear_attention_nnz": 36271, "linear_dense_total": 4718592, "linear_dense_nnz": 137408}}, "total_sparsity": 65.68486388119823, "linear_sparsity": 84.21337575088313}, "speed": {"eval_elapsed_time": 42.81137041794136, "cuda_eval_elapsed_time": 35.28436618041992}, "opt_eval_metrics": {"exact_match": 80.53926206244087, "f1": 88.07603620459668}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 45252556, "linear_total": 84934656, "linear_nnz": 21294026, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2152743, "linear_attention_total": 2359296, "linear_attention_nnz": 158912, "linear_dense_total": 4718592, "linear_dense_nnz": 1993831}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2265132, "linear_attention_total": 2359296, "linear_attention_nnz": 234395, "linear_dense_total": 4718592, "linear_dense_nnz": 2030737}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415512, "linear_attention_total": 2359296, "linear_attention_nnz": 301048, "linear_dense_total": 4718592, "linear_dense_nnz": 2114464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465567, "linear_attention_total": 2359296, "linear_attention_nnz": 358791, "linear_dense_total": 4718592, "linear_dense_nnz": 2106776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2457267, "linear_attention_total": 2359296, "linear_attention_nnz": 398673, "linear_dense_total": 4718592, "linear_dense_nnz": 2058594}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2410577, "linear_attention_total": 2359296, "linear_attention_nnz": 367333, "linear_dense_total": 4718592, "linear_dense_nnz": 2043244}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2206780, "linear_attention_total": 2359296, "linear_attention_nnz": 344288, "linear_dense_total": 4718592, "linear_dense_nnz": 1862492}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1819031, "linear_attention_total": 2359296, "linear_attention_nnz": 304514, "linear_dense_total": 4718592, "linear_dense_nnz": 1514517}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1364821, "linear_attention_total": 2359296, "linear_attention_nnz": 265513, "linear_dense_total": 4718592, "linear_dense_nnz": 1099308}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 828990, "linear_attention_total": 2359296, "linear_attention_nnz": 201714, "linear_dense_total": 4718592, "linear_dense_nnz": 627276}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574541, "linear_attention_total": 2359296, "linear_attention_nnz": 134277, "linear_dense_total": 4718592, "linear_dense_nnz": 440264}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 333065, "linear_attention_total": 2359296, "linear_attention_nnz": 63309, "linear_dense_total": 4718592, "linear_dense_nnz": 269756}}, "total_sparsity": 58.4431701722824, "linear_sparsity": 74.92893124804085}, "speed": {"eval_elapsed_time": 45.09550473978743, "cuda_eval_elapsed_time": 37.59016569519043}, "opt_eval_metrics": {"exact_match": 81.40018921475875, "f1": 88.66263407974378}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 38467586, "linear_total": 84934656, "linear_nnz": 14509056, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1740288, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 940032, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 448512}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1992192, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1728000, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1651200, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 717312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245696, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877056, "linear_attention_total": 2359296, "linear_attention_nnz": 442368, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049088, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 629760, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 233472}}, "total_sparsity": 64.67401918059409, "linear_sparsity": 82.9173900462963}, "speed": {"eval_elapsed_time": 18.811811614781618, "cuda_eval_elapsed_time": 11.526592617034913}, "opt_eval_metrics": {"exact_match": 77.9848628192999, "f1": 85.88807770994393}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 38065154, "linear_total": 84934656, "linear_nnz": 14106624, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1669632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 913920, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 422400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969152, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 631296}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1559040, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1219584, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 471552}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 955392, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1090560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 431616, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.04358500448319, "linear_sparsity": 83.3912037037037}, "speed": {"eval_elapsed_time": 18.890288611873984, "cuda_eval_elapsed_time": 11.48921844482422}, "opt_eval_metrics": {"exact_match": 77.94701986754967, "f1": 85.90050035022541}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38095874, "linear_total": 84934656, "linear_nnz": 14137344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907776, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1967616, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 591360}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1711104, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1608192, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1214976, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 625152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1161216, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 953856, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 413184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1041408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 482304, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 757248, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.0153738728886, "linear_sparsity": 83.35503472222221}, "speed": {"eval_elapsed_time": 18.660761894192547, "cuda_eval_elapsed_time": 11.365778179168702}, "opt_eval_metrics": {"exact_match": 77.43614001892148, "f1": 85.51882546766822}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35435778, "linear_total": 84934656, "linear_nnz": 11477248, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887040, "linear_attention_total": 2359296, "linear_attention_nnz": 384768, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1057792, "linear_attention_total": 2359296, "linear_attention_nnz": 355840, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1285888, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497088, "linear_attention_total": 2359296, "linear_attention_nnz": 672256, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1350912, "linear_attention_total": 2359296, "linear_attention_nnz": 418560, "linear_dense_total": 4718592, "linear_dense_nnz": 932352}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1395712, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1154816, "linear_attention_total": 2359296, "linear_attention_nnz": 498944, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059840, "linear_attention_total": 2359296, "linear_attention_nnz": 497664, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 609024, "linear_attention_total": 2359296, "linear_attention_nnz": 297216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 436224, "linear_attention_total": 2359296, "linear_attention_nnz": 316416, "linear_dense_total": 4718592, "linear_dense_nnz": 119808}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 256512, "linear_dense_total": 4718592, "linear_dense_nnz": 115200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371200, "linear_attention_total": 2359296, "linear_attention_nnz": 150016, "linear_dense_total": 4718592, "linear_dense_nnz": 221184}}, "total_sparsity": 67.45822277621669, "linear_sparsity": 86.4869671103395}, "speed": {"eval_elapsed_time": 20.89022240927443, "cuda_eval_elapsed_time": 13.557396781921387}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.28341140334766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 49759613, "linear_total": 84934656, "linear_nnz": 25846272, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3251712, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2803200, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1918464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3320832, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3353088, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2075136}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2469888, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2322432, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1929216}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2098176, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1508352}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1641984, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1248768}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638912, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566784, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1075200, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 583680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1304064, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1009152}}, "total_sparsity": 54.304199529987116, "linear_sparsity": 69.56922743055556}, "speed": {"eval_elapsed_time": 21.888684407807887, "cuda_eval_elapsed_time": 14.414027736663819}, "opt_eval_metrics": {"exact_match": 77.8713339640492, "f1": 85.86552240887988}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 47529298, "linear_total": 84934656, "linear_nnz": 23617536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2446848, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1660416}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2978304, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1995264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2216448, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1724928}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1824768, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1333248}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526784, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1133568}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35236717199184, "linear_sparsity": 72.19328703703704}, "speed": {"eval_elapsed_time": 21.027485753875226, "cuda_eval_elapsed_time": 13.685213722229005}, "opt_eval_metrics": {"exact_match": 78.06054872280038, "f1": 85.94002543374285}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47521613, "linear_total": 84934656, "linear_nnz": 23609856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2445312, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1658880}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2976768, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1993728}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214912, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1723392}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1823232, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1525248, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1132032}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35942454654601, "linear_sparsity": 72.2023292824074}, "speed": {"eval_elapsed_time": 20.988653406966478, "cuda_eval_elapsed_time": 13.657840488433838}, "opt_eval_metrics": {"exact_match": 78.10785241248817, "f1": 86.00835164251778}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35398714, "linear_total": 84934656, "linear_nnz": 11493376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907264, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1074176, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253376, "linear_attention_total": 2359296, "linear_attention_nnz": 402432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1508352, "linear_attention_total": 2359296, "linear_attention_nnz": 681984, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1328640, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1422848, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1094656, "linear_attention_total": 2359296, "linear_attention_nnz": 449536, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1102848, "linear_attention_total": 2359296, "linear_attention_nnz": 577536, "linear_dense_total": 4718592, "linear_dense_nnz": 525312}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 628224, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 434176, "linear_attention_total": 2359296, "linear_attention_nnz": 320512, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 377344, "linear_attention_total": 2359296, "linear_attention_nnz": 256000, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 361472, "linear_attention_total": 2359296, "linear_attention_nnz": 146432, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 67.49225980035152, "linear_sparsity": 86.46797839506173}, "speed": {"eval_elapsed_time": 20.778783986810595, "cuda_eval_elapsed_time": 13.44245692062378}, "opt_eval_metrics": {"exact_match": 77.18070009460737, "f1": 85.6109462422114}}, "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-215000": {"stats": {"total": 334094338, "nnz": 68649433, "linear_total": 301989888, "linear_nnz": 36684800, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1167360, "linear_attention_total": 4194304, "linear_attention_nnz": 974848, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 576512, "linear_attention_total": 4194304, "linear_attention_nnz": 306176, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 584704, "linear_attention_total": 4194304, "linear_attention_nnz": 297984, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1248256, "linear_attention_total": 4194304, "linear_attention_nnz": 834560, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 848896, "linear_attention_total": 4194304, "linear_attention_nnz": 381952, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 959488, "linear_attention_total": 4194304, "linear_attention_nnz": 406528, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1130496, "linear_attention_total": 4194304, "linear_attention_nnz": 522240, "linear_dense_total": 8388608, "linear_dense_nnz": 608256}, "7": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1209344, "linear_attention_total": 4194304, "linear_attention_nnz": 771072, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1076224, "linear_attention_total": 4194304, "linear_attention_nnz": 414720, "linear_dense_total": 8388608, "linear_dense_nnz": 661504}, "9": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1839104, "linear_attention_total": 4194304, "linear_attention_nnz": 1091584, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2709504, "linear_attention_total": 4194304, "linear_attention_nnz": 1714176, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2908160, "linear_attention_total": 4194304, "linear_attention_nnz": 1875968, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3074048, "linear_attention_total": 4194304, "linear_attention_nnz": 1832960, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3335168, "linear_attention_total": 4194304, "linear_attention_nnz": 2155520, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2851840, "linear_attention_total": 4194304, "linear_attention_nnz": 1942528, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2761728, "linear_attention_total": 4194304, "linear_attention_nnz": 2079744, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2316288, "linear_attention_total": 4194304, "linear_attention_nnz": 1843200, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1950720, "linear_attention_total": 4194304, "linear_attention_nnz": 1582080, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1757184, "linear_attention_total": 4194304, "linear_attention_nnz": 1435648, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 988160, "linear_attention_total": 4194304, "linear_attention_nnz": 717824, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 447488, "linear_attention_total": 4194304, "linear_attention_nnz": 334848, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 436224, "linear_attention_total": 4194304, "linear_attention_nnz": 358400, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 214016, "linear_attention_total": 4194304, "linear_attention_nnz": 134144, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 293888, "linear_attention_total": 4194304, "linear_attention_nnz": 111616, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 79.45208128609471, "linear_sparsity": 87.85230848524306}, "speed": {"eval_elapsed_time": 45.056276460178196, "cuda_eval_elapsed_time": 37.4033356552124}, "opt_eval_metrics": {"exact_match": 82.33680227057711, "f1": 89.04761607630476}}, "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-220000": {"stats": {"total": 334094338, "nnz": 68429014, "linear_total": 301989888, "linear_nnz": 36464640, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1181696, "linear_attention_total": 4194304, "linear_attention_nnz": 989184, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 593920, "linear_attention_total": 4194304, "linear_attention_nnz": 323584, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 573440, "linear_attention_total": 4194304, "linear_attention_nnz": 286720, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1215488, "linear_attention_total": 4194304, "linear_attention_nnz": 801792, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 863232, "linear_attention_total": 4194304, "linear_attention_nnz": 396288, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 958464, "linear_attention_total": 4194304, "linear_attention_nnz": 405504, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1124352, "linear_attention_total": 4194304, "linear_attention_nnz": 520192, "linear_dense_total": 8388608, "linear_dense_nnz": 604160}, "7": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1203200, "linear_attention_total": 4194304, "linear_attention_nnz": 764928, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1083392, "linear_attention_total": 4194304, "linear_attention_nnz": 423936, "linear_dense_total": 8388608, "linear_dense_nnz": 659456}, "9": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1817600, "linear_attention_total": 4194304, "linear_attention_nnz": 1070080, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2741248, "linear_attention_total": 4194304, "linear_attention_nnz": 1745920, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2934784, "linear_attention_total": 4194304, "linear_attention_nnz": 1902592, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3023872, "linear_attention_total": 4194304, "linear_attention_nnz": 1782784, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3326976, "linear_attention_total": 4194304, "linear_attention_nnz": 2147328, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2827264, "linear_attention_total": 4194304, "linear_attention_nnz": 1917952, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2731008, "linear_attention_total": 4194304, "linear_attention_nnz": 2049024, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2293760, "linear_attention_total": 4194304, "linear_attention_nnz": 1820672, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1931264, "linear_attention_total": 4194304, "linear_attention_nnz": 1562624, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1712128, "linear_attention_total": 4194304, "linear_attention_nnz": 1390592, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 958464, "linear_attention_total": 4194304, "linear_attention_nnz": 688128, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 439296, "linear_attention_total": 4194304, "linear_attention_nnz": 326656, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 421888, "linear_attention_total": 4194304, "linear_attention_nnz": 344064, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 208896, "linear_attention_total": 4194304, "linear_attention_nnz": 129024, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 299008, "linear_attention_total": 4194304, "linear_attention_nnz": 116736, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 79.51805636406804, "linear_sparsity": 87.92521158854166}, "speed": {"eval_elapsed_time": 45.05785069381818, "cuda_eval_elapsed_time": 37.49460416412354}, "opt_eval_metrics": {"exact_match": 82.13812677388836, "f1": 89.03656646065757}}, "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-221320": {"stats": {"total": 334094338, "nnz": 68456822, "linear_total": 301989888, "linear_nnz": 36492288, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1171456, "linear_attention_total": 4194304, "linear_attention_nnz": 978944, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 589824, "linear_attention_total": 4194304, "linear_attention_nnz": 319488, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 580608, "linear_attention_total": 4194304, "linear_attention_nnz": 293888, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1209344, "linear_attention_total": 4194304, "linear_attention_nnz": 795648, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 862208, "linear_attention_total": 4194304, "linear_attention_nnz": 395264, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 945152, "linear_attention_total": 4194304, "linear_attention_nnz": 392192, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1127424, "linear_attention_total": 4194304, "linear_attention_nnz": 523264, "linear_dense_total": 8388608, "linear_dense_nnz": 604160}, "7": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1222656, "linear_attention_total": 4194304, "linear_attention_nnz": 784384, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1076224, "linear_attention_total": 4194304, "linear_attention_nnz": 416768, "linear_dense_total": 8388608, "linear_dense_nnz": 659456}, "9": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1816576, "linear_attention_total": 4194304, "linear_attention_nnz": 1069056, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2749440, "linear_attention_total": 4194304, "linear_attention_nnz": 1754112, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2954240, "linear_attention_total": 4194304, "linear_attention_nnz": 1922048, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3016704, "linear_attention_total": 4194304, "linear_attention_nnz": 1775616, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3329024, "linear_attention_total": 4194304, "linear_attention_nnz": 2149376, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2864128, "linear_attention_total": 4194304, "linear_attention_nnz": 1954816, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2747392, "linear_attention_total": 4194304, "linear_attention_nnz": 2065408, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2296832, "linear_attention_total": 4194304, "linear_attention_nnz": 1823744, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1927168, "linear_attention_total": 4194304, "linear_attention_nnz": 1558528, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1678336, "linear_attention_total": 4194304, "linear_attention_nnz": 1356800, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 958464, "linear_attention_total": 4194304, "linear_attention_nnz": 688128, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 439296, "linear_attention_total": 4194304, "linear_attention_nnz": 326656, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 421888, "linear_attention_total": 4194304, "linear_attention_nnz": 344064, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 208896, "linear_attention_total": 4194304, "linear_attention_nnz": 129024, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 299008, "linear_attention_total": 4194304, "linear_attention_nnz": 116736, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 79.50973296650122, "linear_sparsity": 87.91605631510416}, "speed": {"eval_elapsed_time": 45.08761470299214, "cuda_eval_elapsed_time": 37.51742427825928}, "opt_eval_metrics": {"exact_match": 82.30842005676443, "f1": 89.04987146464723}}, "/home/lagunas/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 42173698, "linear_total": 84934656, "linear_nnz": 18215168, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1516544, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1758464, "linear_attention_total": 2359296, "linear_attention_nnz": 564992, "linear_dense_total": 4718592, "linear_dense_nnz": 1193472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2030080, "linear_attention_total": 2359296, "linear_attention_nnz": 646144, "linear_dense_total": 4718592, "linear_dense_nnz": 1383936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2328832, "linear_attention_total": 2359296, "linear_attention_nnz": 969472, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2283264, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1425408}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2099200, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 1396224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1846784, "linear_attention_total": 2359296, "linear_attention_nnz": 774656, "linear_dense_total": 4718592, "linear_dense_nnz": 1072128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1589760, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 783360}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 967424, "linear_attention_total": 2359296, "linear_attention_nnz": 520448, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 617216, "linear_attention_total": 2359296, "linear_attention_nnz": 435968, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 521984, "linear_attention_total": 2359296, "linear_attention_nnz": 354560, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655616, "linear_attention_total": 2359296, "linear_attention_nnz": 231680, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.27058124647028, "linear_sparsity": 78.55390383873457}, "speed": {"eval_elapsed_time": 16.755018649157137}, "opt_eval_metrics": {"exact_match": 79.25260170293284, "f1": 86.93528973939952}}, "/home/lagunas/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42038274, "linear_total": 84934656, "linear_nnz": 18079744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1493248, "linear_attention_total": 2359296, "linear_attention_nnz": 519424, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757440, "linear_attention_total": 2359296, "linear_attention_nnz": 565504, "linear_dense_total": 4718592, "linear_dense_nnz": 1191936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028800, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1382400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2297088, "linear_attention_total": 2359296, "linear_attention_nnz": 937728, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2270464, "linear_attention_total": 2359296, "linear_attention_nnz": 846592, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081792, "linear_attention_total": 2359296, "linear_attention_nnz": 688640, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815296, "linear_attention_total": 2359296, "linear_attention_nnz": 744704, "linear_dense_total": 4718592, "linear_dense_nnz": 1070592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1613312, "linear_attention_total": 2359296, "linear_attention_nnz": 831488, "linear_dense_total": 4718592, "linear_dense_nnz": 781824}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 522496, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 594944, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 513792, "linear_attention_total": 2359296, "linear_attention_nnz": 346368, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 644096, "linear_attention_total": 2359296, "linear_attention_nnz": 220160, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.39494531824976, "linear_sparsity": 78.7133487654321}, "speed": {"eval_elapsed_time": 16.721035415073857}, "opt_eval_metrics": {"exact_match": 79.2620624408704, "f1": 86.97825692623259}}, "/home/lagunas/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold_apme-sigmoied_threshold_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 61067266, "linear_total": 84934656, "linear_nnz": 37108736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3221504, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 2607104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3504128, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 2899968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4439040, "linear_attention_total": 2359296, "linear_attention_nnz": 730112, "linear_dense_total": 4718592, "linear_dense_nnz": 3708928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4859904, "linear_attention_total": 2359296, "linear_attention_nnz": 1044480, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4734976, "linear_attention_total": 2359296, "linear_attention_nnz": 1012736, "linear_dense_total": 4718592, "linear_dense_nnz": 3722240}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4660224, "linear_attention_total": 2359296, "linear_attention_nnz": 882688, "linear_dense_total": 4718592, "linear_dense_nnz": 3777536}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4239360, "linear_attention_total": 2359296, "linear_attention_nnz": 980992, "linear_dense_total": 4718592, "linear_dense_nnz": 3258368}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3137536, "linear_attention_total": 2359296, "linear_attention_nnz": 903168, "linear_dense_total": 4718592, "linear_dense_nnz": 2234368}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835008, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 1124352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877568, "linear_attention_total": 2359296, "linear_attention_nnz": 552960, "linear_dense_total": 4718592, "linear_dense_nnz": 324608}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852992, "linear_attention_total": 2359296, "linear_attention_nnz": 401408, "linear_dense_total": 4718592, "linear_dense_nnz": 451584}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746496, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 501760}}, "total_sparsity": 43.920030037508496, "linear_sparsity": 56.309076003086425}, "speed": {"eval_elapsed_time": 25.03221789188683}, "opt_eval_metrics": {"exact_match": 79.64049195837275, "f1": 87.40026291426761}}, "/home/lagunas/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49229570, "linear_total": 84934656, "linear_nnz": 25271040, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214400, "linear_attention_total": 2359296, "linear_attention_nnz": 721408, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2390784, "linear_attention_total": 2359296, "linear_attention_nnz": 635136, "linear_dense_total": 4718592, "linear_dense_nnz": 1755648}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850560, "linear_attention_total": 2359296, "linear_attention_nnz": 972032, "linear_dense_total": 4718592, "linear_dense_nnz": 1878528}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3188736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 1932288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3149824, "linear_attention_total": 2359296, "linear_attention_nnz": 1260544, "linear_dense_total": 4718592, "linear_dense_nnz": 1889280}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 1784832}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2455040, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015744, "linear_attention_total": 2359296, "linear_attention_nnz": 988160, "linear_dense_total": 4718592, "linear_dense_nnz": 1027584}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1550080, "linear_attention_total": 2359296, "linear_attention_nnz": 903424, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 886784, "linear_attention_total": 2359296, "linear_attention_nnz": 636416, "linear_dense_total": 4718592, "linear_dense_nnz": 250368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 682752, "linear_attention_total": 2359296, "linear_attention_nnz": 484608, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 980224, "linear_attention_total": 2359296, "linear_attention_nnz": 313600, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}}, "total_sparsity": 54.79095450471988, "linear_sparsity": 70.2464916087963}, "speed": {"eval_elapsed_time": 20.265998144168407}, "opt_eval_metrics": {"exact_match": 80.6244087038789, "f1": 88.07723643002453}}, "/home/lagunas/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 43891202, "linear_total": 84934656, "linear_nnz": 19932672, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045184, "linear_attention_total": 2359296, "linear_attention_nnz": 427776, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102784, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 1708032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2424576, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 1955328}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2502912, "linear_attention_total": 2359296, "linear_attention_nnz": 579840, "linear_dense_total": 4718592, "linear_dense_nnz": 1923072}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2376960, "linear_attention_total": 2359296, "linear_attention_nnz": 539904, "linear_dense_total": 4718592, "linear_dense_nnz": 1837056}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2201856, "linear_attention_total": 2359296, "linear_attention_nnz": 424704, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1907712, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 1468416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1580544, "linear_attention_total": 2359296, "linear_attention_nnz": 428544, "linear_dense_total": 4718592, "linear_dense_nnz": 1152000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1095168, "linear_attention_total": 2359296, "linear_attention_nnz": 397824, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527616, "linear_attention_total": 2359296, "linear_attention_nnz": 235776, "linear_dense_total": 4718592, "linear_dense_nnz": 291840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 428544, "linear_attention_total": 2359296, "linear_attention_nnz": 182784, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738816, "linear_attention_total": 2359296, "linear_attention_nnz": 112128, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}}, "total_sparsity": 59.6933438975695, "linear_sparsity": 76.53175636574075}, "speed": {"eval_elapsed_time": 23.130286294035614}, "opt_eval_metrics": {"exact_match": 79.15799432355723, "f1": 86.94169166073364}}}, "base_speed_report": {"eval_elapsed_time": 45.42232701787725, "cuda_eval_elapsed_time": 38.59338353729248}} \ No newline at end of file diff --git a/analysis/files/results/results16.json b/analysis/files/results/results16.json deleted file mode 100644 index 586ac840..00000000 --- a/analysis/files/results/results16.json +++ /dev/null @@ -1 +0,0 @@ -{"checkpoints": {"/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": {"stats": {"total": 90984386, "nnz": 40951962, "linear_total": 67043328, "linear_nnz": 17043456, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1268736, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "1": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1296384, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "2": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1440768, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "3": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2006016, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "4": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1709568, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1863168, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "6": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1628160, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1901568, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 525312}, "8": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 923136, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1096704, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1104384, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 804864, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 54.990121052199, "linear_sparsity": 74.57844574780059}, "speed": {"eval_elapsed_time": 20.85535095212981, "cuda_eval_elapsed_time": 13.783753513336181}, "opt_eval_metrics": {"exact_match": 78.63765373699148, "f1": 86.69392512957342}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-20000": {"stats": {"total": 99840386, "nnz": 50390485, "linear_total": 75890688, "linear_nnz": 26472960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1903104, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 526848}, "1": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1735680, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 752640}, "2": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2840064, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 873984}, "3": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2721792, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "4": {"total": 768, "nnz": 768, "linear_total": 6881280, "linear_nnz": 3208704, "linear_attention_total": 2162688, "linear_attention_nnz": 2162688, "linear_dense_total": 4718592, "linear_dense_nnz": 1046016}, "5": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2952192, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 986112}, "6": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2509824, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 740352}, "7": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2131968, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "8": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2259456, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1293312, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 1671168, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 98304}, "11": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1245696, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}}, "total_sparsity": 49.52895614806617, "linear_sparsity": 65.11698510362694}, "speed": {"eval_elapsed_time": 26.62503844080493, "cuda_eval_elapsed_time": 19.458871116638186}, "opt_eval_metrics": {"exact_match": 80.86092715231788, "f1": 88.26868699204444}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-22132": {"stats": {"total": 99840386, "nnz": 50390485, "linear_total": 75890688, "linear_nnz": 26472960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1903104, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 526848}, "1": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1735680, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 752640}, "2": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2840064, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 873984}, "3": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2721792, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "4": {"total": 768, "nnz": 768, "linear_total": 6881280, "linear_nnz": 3208704, "linear_attention_total": 2162688, "linear_attention_nnz": 2162688, "linear_dense_total": 4718592, "linear_dense_nnz": 1046016}, "5": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2952192, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 986112}, "6": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2509824, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 740352}, "7": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2131968, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "8": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2259456, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1293312, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 1671168, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 98304}, "11": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1245696, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}}, "total_sparsity": 49.52895614806617, "linear_sparsity": 65.11698510362694}, "speed": {"eval_elapsed_time": 26.577815205790102, "cuda_eval_elapsed_time": 19.453059474945068}, "opt_eval_metrics": {"exact_match": 80.87038789025544, "f1": 88.24613086360249}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25/checkpoint-22500": {"stats": {"total": 271133698, "nnz": 105691291, "linear_total": 239075328, "linear_nnz": 73713664, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 2408448, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 835584}, "1": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 1800192, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 1275904}, "2": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 1884160, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 1359872}, "3": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 2734080, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 1685504}, "4": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 2291712, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 1767424}, "5": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 2660352, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 1873920}, "6": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 2578432, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 2054144}, "7": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 2822144, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 1773568}, "8": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 2492416, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 1968128}, "9": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 3297280, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 1986560}, "10": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 5031936, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 2410496}, "11": {"total": 1024, "nnz": 1024, "linear_total": 10747904, "linear_nnz": 4870144, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 8388608, "linear_dense_nnz": 2510848}, "12": {"total": 1024, "nnz": 1024, "linear_total": 10485760, "linear_nnz": 4757504, "linear_attention_total": 2097152, "linear_attention_nnz": 2097152, "linear_dense_total": 8388608, "linear_dense_nnz": 2660352}, "13": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 5750784, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 2605056}, "14": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 4921344, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 2299904}, "15": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 4321280, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 1699840}, "16": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 4024320, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 1402880}, "17": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4243456, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1097728}, "18": {"total": 1024, "nnz": 1024, "linear_total": 10747904, "linear_nnz": 3260416, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 8388608, "linear_dense_nnz": 901120}, "19": {"total": 1024, "nnz": 1024, "linear_total": 10223616, "linear_nnz": 2574336, "linear_attention_total": 1835008, "linear_attention_nnz": 1835008, "linear_dense_total": 8388608, "linear_dense_nnz": 739328}, "20": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1406976, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 358400}, "21": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1243136, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 194560}, "22": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 704512, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 180224}, "23": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1634304, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 323584}}, "total_sparsity": 61.01875503501597, "linear_sparsity": 69.16718064692982}, "speed": {"eval_elapsed_time": 48.981834520120174, "cuda_eval_elapsed_time": 41.6732879486084}, "opt_eval_metrics": {"exact_match": 84.399243140965, "f1": 90.84270784891945}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25/checkpoint-25000": {"stats": {"total": 271133698, "nnz": 105691291, "linear_total": 239075328, "linear_nnz": 73713664, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 2408448, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 835584}, "1": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 1800192, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 1275904}, "2": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 1884160, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 1359872}, "3": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 2734080, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 1685504}, "4": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 2291712, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 1767424}, "5": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 2660352, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 1873920}, "6": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 2578432, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 2054144}, "7": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 2822144, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 1773568}, "8": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 2492416, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 1968128}, "9": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 3297280, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 1986560}, "10": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 5031936, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 2410496}, "11": {"total": 1024, "nnz": 1024, "linear_total": 10747904, "linear_nnz": 4870144, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 8388608, "linear_dense_nnz": 2510848}, "12": {"total": 1024, "nnz": 1024, "linear_total": 10485760, "linear_nnz": 4757504, "linear_attention_total": 2097152, "linear_attention_nnz": 2097152, "linear_dense_total": 8388608, "linear_dense_nnz": 2660352}, "13": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 5750784, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 2605056}, "14": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 4921344, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 2299904}, "15": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 4321280, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 1699840}, "16": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 4024320, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 1402880}, "17": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4243456, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1097728}, "18": {"total": 1024, "nnz": 1024, "linear_total": 10747904, "linear_nnz": 3260416, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 8388608, "linear_dense_nnz": 901120}, "19": {"total": 1024, "nnz": 1024, "linear_total": 10223616, "linear_nnz": 2574336, "linear_attention_total": 1835008, "linear_attention_nnz": 1835008, "linear_dense_total": 8388608, "linear_dense_nnz": 739328}, "20": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1406976, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 358400}, "21": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1243136, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 194560}, "22": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 704512, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 180224}, "23": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1634304, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 323584}}, "total_sparsity": 61.01875503501597, "linear_sparsity": 69.16718064692982}, "speed": {"eval_elapsed_time": 49.06402187002823, "cuda_eval_elapsed_time": 41.50353849792481}, "opt_eval_metrics": {"exact_match": 84.20056764427625, "f1": 90.73941291394593}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25/checkpoint-27665": {"stats": {"total": 271133698, "nnz": 105691291, "linear_total": 239075328, "linear_nnz": 73713664, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 2408448, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 835584}, "1": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 1800192, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 1275904}, "2": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 1884160, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 1359872}, "3": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 2734080, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 1685504}, "4": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 2291712, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 1767424}, "5": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 2660352, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 1873920}, "6": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 2578432, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 2054144}, "7": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 2822144, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 1773568}, "8": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 2492416, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 1968128}, "9": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 3297280, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 1986560}, "10": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 5031936, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 2410496}, "11": {"total": 1024, "nnz": 1024, "linear_total": 10747904, "linear_nnz": 4870144, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 8388608, "linear_dense_nnz": 2510848}, "12": {"total": 1024, "nnz": 1024, "linear_total": 10485760, "linear_nnz": 4757504, "linear_attention_total": 2097152, "linear_attention_nnz": 2097152, "linear_dense_total": 8388608, "linear_dense_nnz": 2660352}, "13": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 5750784, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 2605056}, "14": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 4921344, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 2299904}, "15": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 4321280, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 1699840}, "16": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 4024320, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 1402880}, "17": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4243456, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1097728}, "18": {"total": 1024, "nnz": 1024, "linear_total": 10747904, "linear_nnz": 3260416, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 8388608, "linear_dense_nnz": 901120}, "19": {"total": 1024, "nnz": 1024, "linear_total": 10223616, "linear_nnz": 2574336, "linear_attention_total": 1835008, "linear_attention_nnz": 1835008, "linear_dense_total": 8388608, "linear_dense_nnz": 739328}, "20": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1406976, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 358400}, "21": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1243136, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 194560}, "22": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 704512, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 180224}, "23": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1634304, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 323584}}, "total_sparsity": 61.01875503501597, "linear_sparsity": 69.16718064692982}, "speed": {"eval_elapsed_time": 49.02150737866759, "cuda_eval_elapsed_time": 41.6272840423584}, "opt_eval_metrics": {"exact_match": 84.2100283822138, "f1": 90.70141124860059}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25_v3_f91.03/checkpoint-55000": {"stats": {"total": 271133698, "nnz": 105691291, "linear_total": 239075328, "linear_nnz": 73713664, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 2408448, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 835584}, "1": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 1800192, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 1275904}, "2": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 1884160, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 1359872}, "3": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 2734080, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 1685504}, "4": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 2291712, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 1767424}, "5": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 2660352, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 1873920}, "6": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 2578432, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 2054144}, "7": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 2822144, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 1773568}, "8": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 2492416, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 1968128}, "9": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 3297280, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 1986560}, "10": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 5031936, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 2410496}, "11": {"total": 1024, "nnz": 1024, "linear_total": 10747904, "linear_nnz": 4870144, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 8388608, "linear_dense_nnz": 2510848}, "12": {"total": 1024, "nnz": 1024, "linear_total": 10485760, "linear_nnz": 4757504, "linear_attention_total": 2097152, "linear_attention_nnz": 2097152, "linear_dense_total": 8388608, "linear_dense_nnz": 2660352}, "13": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 5750784, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 2605056}, "14": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 4921344, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 2299904}, "15": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 4321280, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 1699840}, "16": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 4024320, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 1402880}, "17": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4243456, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1097728}, "18": {"total": 1024, "nnz": 1024, "linear_total": 10747904, "linear_nnz": 3260416, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 8388608, "linear_dense_nnz": 901120}, "19": {"total": 1024, "nnz": 1024, "linear_total": 10223616, "linear_nnz": 2574336, "linear_attention_total": 1835008, "linear_attention_nnz": 1835008, "linear_dense_total": 8388608, "linear_dense_nnz": 739328}, "20": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1406976, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 358400}, "21": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1243136, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 194560}, "22": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 704512, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 180224}, "23": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1634304, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 323584}}, "total_sparsity": 61.01875503501597, "linear_sparsity": 69.16718064692982}, "speed": {"eval_elapsed_time": 49.32021534908563, "cuda_eval_elapsed_time": 41.85157574462891}, "opt_eval_metrics": {"exact_match": 84.63576158940397, "f1": 91.0266636723574}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25_v3_f91.03/checkpoint-55330": {"stats": {"total": 271133698, "nnz": 105691291, "linear_total": 239075328, "linear_nnz": 73713664, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 2408448, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 835584}, "1": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 1800192, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 1275904}, "2": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 1884160, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 1359872}, "3": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 2734080, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 1685504}, "4": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 2291712, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 1767424}, "5": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 2660352, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 1873920}, "6": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 2578432, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 2054144}, "7": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 2822144, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 1773568}, "8": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 2492416, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 1968128}, "9": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 3297280, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 1986560}, "10": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 5031936, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 2410496}, "11": {"total": 1024, "nnz": 1024, "linear_total": 10747904, "linear_nnz": 4870144, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 8388608, "linear_dense_nnz": 2510848}, "12": {"total": 1024, "nnz": 1024, "linear_total": 10485760, "linear_nnz": 4757504, "linear_attention_total": 2097152, "linear_attention_nnz": 2097152, "linear_dense_total": 8388608, "linear_dense_nnz": 2660352}, "13": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 5750784, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 2605056}, "14": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 4921344, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 2299904}, "15": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 4321280, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 1699840}, "16": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 4024320, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 1402880}, "17": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4243456, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1097728}, "18": {"total": 1024, "nnz": 1024, "linear_total": 10747904, "linear_nnz": 3260416, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 8388608, "linear_dense_nnz": 901120}, "19": {"total": 1024, "nnz": 1024, "linear_total": 10223616, "linear_nnz": 2574336, "linear_attention_total": 1835008, "linear_attention_nnz": 1835008, "linear_dense_total": 8388608, "linear_dense_nnz": 739328}, "20": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1406976, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 358400}, "21": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1243136, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 194560}, "22": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 704512, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 180224}, "23": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1634304, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 323584}}, "total_sparsity": 61.01875503501597, "linear_sparsity": 69.16718064692982}, "speed": {"eval_elapsed_time": 49.428419118281454, "cuda_eval_elapsed_time": 41.85431317138672}, "opt_eval_metrics": {"exact_match": 84.65468306527909, "f1": 91.01004624462917}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-15000": {"stats": {"total": 95510786, "nnz": 52448657, "linear_total": 71565312, "linear_nnz": 28531200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2125824, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2357760, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3317760, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3334656, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3495936, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2809344, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2646528, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2363904, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2119680, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1428480, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1367040, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1164288, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 45.08614241746477, "linear_sparsity": 60.1326407967033}, "speed": {"eval_elapsed_time": 25.7331585730426, "cuda_eval_elapsed_time": 18.439563426971436}, "opt_eval_metrics": {"exact_match": 80.80416272469253, "f1": 88.20260662536118}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-20000": {"stats": {"total": 95510786, "nnz": 52448657, "linear_total": 71565312, "linear_nnz": 28531200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2125824, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2357760, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3317760, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3334656, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3495936, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2809344, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2646528, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2363904, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2119680, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1428480, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1367040, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1164288, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 45.08614241746477, "linear_sparsity": 60.1326407967033}, "speed": {"eval_elapsed_time": 26.023085076361895, "cuda_eval_elapsed_time": 18.875869693756105}, "opt_eval_metrics": {"exact_match": 80.6717123935667, "f1": 88.128983727943}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": {"stats": {"total": 95510786, "nnz": 52448657, "linear_total": 71565312, "linear_nnz": 28531200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2125824, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 2357760, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3317760, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 3334656, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3495936, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2809344, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2646528, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2363904, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2119680, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1428480, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1367040, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 1164288, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 45.08614241746477, "linear_sparsity": 60.1326407967033}, "speed": {"eval_elapsed_time": 25.61402732366696, "cuda_eval_elapsed_time": 18.42703369522095}, "opt_eval_metrics": {"exact_match": 80.68117313150425, "f1": 88.11014400914335}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13/checkpoint-22132": {"stats": {"total": 96101186, "nnz": 47671853, "linear_total": 72155136, "linear_nnz": 23757312, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1420800, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 634368}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1703424, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "2": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2786304, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2649600, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1076736}, "4": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 3124224, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 1158144}, "5": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2449920, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "6": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2388480, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2006016, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "8": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 1910784, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1122816, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1291776, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 112128}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 903168, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}}, "total_sparsity": 50.394105437991165, "linear_sparsity": 67.07467643051771}, "speed": {"eval_elapsed_time": 24.534384376835078, "cuda_eval_elapsed_time": 17.390718185424806}, "opt_eval_metrics": {"exact_match": 80.69063386944181, "f1": 88.06386432532665}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10/checkpoint-47500": {"stats": {"total": 274806402, "nnz": 85952121, "linear_total": 242745344, "linear_nnz": 53983232, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 1765376, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 794624, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 811008, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1724416, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 991232, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 1077248, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 1394688, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 608256}, "7": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1748992, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1710080, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 661504}, "9": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 2320384, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3616768, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3653632, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4386816, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4325376, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3792896, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3303424, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3356672, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3252224, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3205120, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 10485760, "linear_nnz": 2367488, "linear_attention_total": 2097152, "linear_attention_nnz": 2097152, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 899072, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1388544, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 866304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1230848, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 68.72266425583491, "linear_sparsity": 77.76137284017278}, "speed": {"eval_elapsed_time": 44.58338421070948, "cuda_eval_elapsed_time": 37.53850735473633}, "opt_eval_metrics": {"exact_match": 83.74645222327341, "f1": 90.16320537561052}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10/checkpoint-55330": {"stats": {"total": 274806402, "nnz": 85952121, "linear_total": 242745344, "linear_nnz": 53983232, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 1765376, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 794624, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 811008, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1724416, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 991232, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 8912896, "linear_nnz": 1077248, "linear_attention_total": 524288, "linear_attention_nnz": 524288, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 1394688, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 608256}, "7": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1748992, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1710080, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 661504}, "9": {"total": 1024, "nnz": 1024, "linear_total": 9961472, "linear_nnz": 2320384, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3616768, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3653632, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4386816, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 11534336, "linear_nnz": 4325376, "linear_attention_total": 3145728, "linear_attention_nnz": 3145728, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3792896, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 11010048, "linear_nnz": 3303424, "linear_attention_total": 2621440, "linear_attention_nnz": 2621440, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3356672, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3252224, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 11272192, "linear_nnz": 3205120, "linear_attention_total": 2883584, "linear_attention_nnz": 2883584, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 10485760, "linear_nnz": 2367488, "linear_attention_total": 2097152, "linear_attention_nnz": 2097152, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 899072, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 9699328, "linear_nnz": 1388544, "linear_attention_total": 1310720, "linear_attention_nnz": 1310720, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 9175040, "linear_nnz": 866304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 9437184, "linear_nnz": 1230848, "linear_attention_total": 1048576, "linear_attention_nnz": 1048576, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 68.72266425583491, "linear_sparsity": 77.76137284017278}, "speed": {"eval_elapsed_time": 44.469506811816245, "cuda_eval_elapsed_time": 37.30008307647705}, "opt_eval_metrics": {"exact_match": 83.62346263008514, "f1": 90.10843526218638}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-22132": {"stats": {"total": 99446786, "nnz": 54738530, "linear_total": 75497472, "linear_nnz": 30818304, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2502144, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1125888}, "1": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 2268672, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3293184, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1523712}, "3": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 3325440, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1555968}, "4": {"total": 768, "nnz": 768, "linear_total": 6881280, "linear_nnz": 3780096, "linear_attention_total": 2162688, "linear_attention_nnz": 2162688, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "5": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 3480576, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 1514496}, "6": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2904576, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 1135104}, "7": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2420736, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "8": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2440704, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 474624}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1388544, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 208896}, "10": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1545216, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "11": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1468416, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 485376}}, "total_sparsity": 44.95696422004025, "linear_sparsity": 59.1796875}, "speed": {"eval_elapsed_time": 28.213609586004168, "cuda_eval_elapsed_time": 20.951393741607667}, "opt_eval_metrics": {"exact_match": 81.69347209082308, "f1": 88.72194531479171}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-22132": {"stats": {"total": 93345986, "nnz": 42356011, "linear_total": 69402624, "linear_nnz": 18445824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1198080, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1379328, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1878528, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "3": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2090496, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 714240}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2210304, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1726464, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 743424}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1747968, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1826304, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "8": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1443840, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 264192}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1084416, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 101376}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1070592, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 789504, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}}, "total_sparsity": 54.62471091151151, "linear_sparsity": 73.42200779036827}, "speed": {"eval_elapsed_time": 22.048566517885774, "cuda_eval_elapsed_time": 14.848762104034424}, "opt_eval_metrics": {"exact_match": 79.4228949858089, "f1": 87.22907143184382}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-20000": {"stats": {"total": 93149186, "nnz": 46161559, "linear_total": 69206016, "linear_nnz": 22248960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1634304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1887744, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2400768, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2588160, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2655744, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 2199552, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2131968, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2092032, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1417728, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1155072, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1130496, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 955392, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 50.443411282198426, "linear_sparsity": 67.85111860795455}, "speed": {"eval_elapsed_time": 23.001069764140993, "cuda_eval_elapsed_time": 15.845825397491456}, "opt_eval_metrics": {"exact_match": 80.01892147587512, "f1": 87.70568682399205}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": {"stats": {"total": 93149186, "nnz": 46161559, "linear_total": 69206016, "linear_nnz": 22248960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1634304, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1887744, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2400768, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2588160, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2655744, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 2199552, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 2131968, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 2092032, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1417728, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1155072, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1130496, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 955392, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 50.443411282198426, "linear_sparsity": 67.85111860795455}, "speed": {"eval_elapsed_time": 22.999519595876336, "cuda_eval_elapsed_time": 15.838374267578125}, "opt_eval_metrics": {"exact_match": 80.02838221381268, "f1": 87.70940223967354}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15/checkpoint-22132": {"stats": {"total": 93739586, "nnz": 40333447, "linear_total": 69795840, "linear_nnz": 16424448, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 967680, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "1": {"total": 768, "nnz": 768, "linear_total": 5505024, "linear_nnz": 1085952, "linear_attention_total": 786432, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 299520}, "2": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1586688, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 407040}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2013696, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}, "4": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1872384, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "5": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1416192, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "6": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1517568, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1645056, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 268800}, "8": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1534464, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "9": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1056768, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 73728}, "10": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1041408, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 58368}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 686592, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 56.972876965767696, "linear_sparsity": 76.46786971830986}, "speed": {"eval_elapsed_time": 21.489493974950165, "cuda_eval_elapsed_time": 14.354346725463868}, "opt_eval_metrics": {"exact_match": 78.82686849574267, "f1": 86.75497848244157}}, "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40/checkpoint-22132": {"stats": {"total": 97281986, "nnz": 45486623, "linear_total": 73334784, "linear_nnz": 21573120, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1477632, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}, "1": {"total": 768, "nnz": 768, "linear_total": 5701632, "linear_nnz": 1466880, "linear_attention_total": 983040, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 483840}, "2": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 2388480, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 619008}, "3": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2230272, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 657408}, "4": {"total": 768, "nnz": 768, "linear_total": 6684672, "linear_nnz": 2671104, "linear_attention_total": 1966080, "linear_attention_nnz": 1966080, "linear_dense_total": 4718592, "linear_dense_nnz": 705024}, "5": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2241024, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 668160}, "6": {"total": 768, "nnz": 768, "linear_total": 6291456, "linear_nnz": 2088960, "linear_attention_total": 1572864, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 516096}, "7": {"total": 768, "nnz": 768, "linear_total": 6094848, "linear_nnz": 1760256, "linear_attention_total": 1376256, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 384000}, "8": {"total": 768, "nnz": 768, "linear_total": 6488064, "linear_nnz": 1973760, "linear_attention_total": 1769472, "linear_attention_nnz": 1769472, "linear_dense_total": 4718592, "linear_dense_nnz": 204288}, "9": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1271808, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 5898240, "linear_nnz": 1253376, "linear_attention_total": 1179648, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 73728}, "11": {"total": 768, "nnz": 768, "linear_total": 5308416, "linear_nnz": 749568, "linear_attention_total": 589824, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 159744}}, "total_sparsity": 53.24250164876363, "linear_sparsity": 70.58269101876675}, "speed": {"eval_elapsed_time": 24.304617804009467, "cuda_eval_elapsed_time": 17.154361824035647}, "opt_eval_metrics": {"exact_match": 80.23651844843897, "f1": 87.68464122182475}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a8-l10-dl1--2021-01-24--15-46-20/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53991210, "linear_total": 84934656, "linear_nnz": 30052224, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2737920, "linear_attention_total": 2359296, "linear_attention_nnz": 634048, "linear_dense_total": 4718592, "linear_dense_nnz": 2103872}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2982272, "linear_attention_total": 2359296, "linear_attention_nnz": 662208, "linear_dense_total": 4718592, "linear_dense_nnz": 2320064}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3613632, "linear_attention_total": 2359296, "linear_attention_nnz": 975744, "linear_dense_total": 4718592, "linear_dense_nnz": 2637888}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3788800, "linear_attention_total": 2359296, "linear_attention_nnz": 1107392, "linear_dense_total": 4718592, "linear_dense_nnz": 2681408}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3873920, "linear_attention_total": 2359296, "linear_attention_nnz": 1248448, "linear_dense_total": 4718592, "linear_dense_nnz": 2625472}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3741376, "linear_attention_total": 2359296, "linear_attention_nnz": 1182592, "linear_dense_total": 4718592, "linear_dense_nnz": 2558784}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3147520, "linear_attention_total": 2359296, "linear_attention_nnz": 1016896, "linear_dense_total": 4718592, "linear_dense_nnz": 2130624}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2439552, "linear_attention_total": 2359296, "linear_attention_nnz": 915648, "linear_dense_total": 4718592, "linear_dense_nnz": 1523904}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1647744, "linear_attention_total": 2359296, "linear_attention_nnz": 820288, "linear_dense_total": 4718592, "linear_dense_nnz": 827456}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 819584, "linear_attention_total": 2359296, "linear_attention_nnz": 514176, "linear_dense_total": 4718592, "linear_dense_nnz": 305408}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 698368, "linear_attention_total": 2359296, "linear_attention_nnz": 398848, "linear_dense_total": 4718592, "linear_dense_nnz": 299520}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 561536, "linear_attention_total": 2359296, "linear_attention_nnz": 262976, "linear_dense_total": 4718592, "linear_dense_nnz": 298560}}, "total_sparsity": 50.418192374314394, "linear_sparsity": 64.61724175347221}, "speed": {"eval_elapsed_time": 36.97127141384408, "cuda_eval_elapsed_time": 29.553753234863283}, "opt_eval_metrics": {"exact_match": 80.6244087038789, "f1": 88.02730364897265}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a8-l10-dl1--2021-01-24--15-46-20/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53972650, "linear_total": 84934656, "linear_nnz": 30033664, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2736256, "linear_attention_total": 2359296, "linear_attention_nnz": 633664, "linear_dense_total": 4718592, "linear_dense_nnz": 2102592}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2981952, "linear_attention_total": 2359296, "linear_attention_nnz": 662336, "linear_dense_total": 4718592, "linear_dense_nnz": 2319616}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3611840, "linear_attention_total": 2359296, "linear_attention_nnz": 975296, "linear_dense_total": 4718592, "linear_dense_nnz": 2636544}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3788096, "linear_attention_total": 2359296, "linear_attention_nnz": 1107968, "linear_dense_total": 4718592, "linear_dense_nnz": 2680128}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3871872, "linear_attention_total": 2359296, "linear_attention_nnz": 1247936, "linear_dense_total": 4718592, "linear_dense_nnz": 2623936}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3740096, "linear_attention_total": 2359296, "linear_attention_nnz": 1181888, "linear_dense_total": 4718592, "linear_dense_nnz": 2558208}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3147520, "linear_attention_total": 2359296, "linear_attention_nnz": 1015040, "linear_dense_total": 4718592, "linear_dense_nnz": 2132480}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2437120, "linear_attention_total": 2359296, "linear_attention_nnz": 913792, "linear_dense_total": 4718592, "linear_dense_nnz": 1523328}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1646016, "linear_attention_total": 2359296, "linear_attention_nnz": 818752, "linear_dense_total": 4718592, "linear_dense_nnz": 827264}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 819008, "linear_attention_total": 2359296, "linear_attention_nnz": 514368, "linear_dense_total": 4718592, "linear_dense_nnz": 304640}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 693888, "linear_attention_total": 2359296, "linear_attention_nnz": 396032, "linear_dense_total": 4718592, "linear_dense_nnz": 297856}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 560000, "linear_attention_total": 2359296, "linear_attention_nnz": 262208, "linear_dense_total": 4718592, "linear_dense_nnz": 297792}}, "total_sparsity": 50.4352365996528, "linear_sparsity": 64.6390938464506}, "speed": {"eval_elapsed_time": 36.84984774328768, "cuda_eval_elapsed_time": 29.255816642761232}, "opt_eval_metrics": {"exact_match": 80.54872280037843, "f1": 87.861684752796}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l20-dl1--2021-01-24--15-48-09/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 43119238, "linear_total": 84934656, "linear_nnz": 19181376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1861936, "linear_attention_total": 2359296, "linear_attention_nnz": 369200, "linear_dense_total": 4718592, "linear_dense_nnz": 1492736}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2108384, "linear_attention_total": 2359296, "linear_attention_nnz": 467520, "linear_dense_total": 4718592, "linear_dense_nnz": 1640864}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2418848, "linear_attention_total": 2359296, "linear_attention_nnz": 642896, "linear_dense_total": 4718592, "linear_dense_nnz": 1775952}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2503152, "linear_attention_total": 2359296, "linear_attention_nnz": 744752, "linear_dense_total": 4718592, "linear_dense_nnz": 1758400}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2447312, "linear_attention_total": 2359296, "linear_attention_nnz": 774128, "linear_dense_total": 4718592, "linear_dense_nnz": 1673184}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2218640, "linear_attention_total": 2359296, "linear_attention_nnz": 636736, "linear_dense_total": 4718592, "linear_dense_nnz": 1581904}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1927200, "linear_attention_total": 2359296, "linear_attention_nnz": 605744, "linear_dense_total": 4718592, "linear_dense_nnz": 1321456}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1454768, "linear_attention_total": 2359296, "linear_attention_nnz": 548160, "linear_dense_total": 4718592, "linear_dense_nnz": 906608}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1011232, "linear_attention_total": 2359296, "linear_attention_nnz": 486720, "linear_dense_total": 4718592, "linear_dense_nnz": 524512}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 487584, "linear_attention_total": 2359296, "linear_attention_nnz": 307184, "linear_dense_total": 4718592, "linear_dense_nnz": 180400}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 415888, "linear_attention_total": 2359296, "linear_attention_nnz": 242752, "linear_dense_total": 4718592, "linear_dense_nnz": 173136}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 326432, "linear_attention_total": 2359296, "linear_attention_nnz": 157440, "linear_dense_total": 4718592, "linear_dense_nnz": 168992}}, "total_sparsity": 60.40226245194075, "linear_sparsity": 77.41631401909721}, "speed": {"eval_elapsed_time": 37.18844554480165, "cuda_eval_elapsed_time": 29.62903995513916}, "opt_eval_metrics": {"exact_match": 79.72563859981078, "f1": 87.37325813950282}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l20-dl1--2021-01-24--15-48-09/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 43114218, "linear_total": 84934656, "linear_nnz": 19176352, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1861424, "linear_attention_total": 2359296, "linear_attention_nnz": 369024, "linear_dense_total": 4718592, "linear_dense_nnz": 1492400}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2107600, "linear_attention_total": 2359296, "linear_attention_nnz": 467072, "linear_dense_total": 4718592, "linear_dense_nnz": 1640528}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2419280, "linear_attention_total": 2359296, "linear_attention_nnz": 643248, "linear_dense_total": 4718592, "linear_dense_nnz": 1776032}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2502560, "linear_attention_total": 2359296, "linear_attention_nnz": 744560, "linear_dense_total": 4718592, "linear_dense_nnz": 1758000}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2446544, "linear_attention_total": 2359296, "linear_attention_nnz": 773760, "linear_dense_total": 4718592, "linear_dense_nnz": 1672784}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2217776, "linear_attention_total": 2359296, "linear_attention_nnz": 636208, "linear_dense_total": 4718592, "linear_dense_nnz": 1581568}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1926704, "linear_attention_total": 2359296, "linear_attention_nnz": 605664, "linear_dense_total": 4718592, "linear_dense_nnz": 1321040}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1454544, "linear_attention_total": 2359296, "linear_attention_nnz": 548160, "linear_dense_total": 4718592, "linear_dense_nnz": 906384}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1010816, "linear_attention_total": 2359296, "linear_attention_nnz": 486464, "linear_dense_total": 4718592, "linear_dense_nnz": 524352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 487408, "linear_attention_total": 2359296, "linear_attention_nnz": 306864, "linear_dense_total": 4718592, "linear_dense_nnz": 180544}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 415616, "linear_attention_total": 2359296, "linear_attention_nnz": 242352, "linear_dense_total": 4718592, "linear_dense_nnz": 173264}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 326080, "linear_attention_total": 2359296, "linear_attention_nnz": 157280, "linear_dense_total": 4718592, "linear_dense_nnz": 168800}}, "total_sparsity": 60.40687247409585, "linear_sparsity": 77.42222915461035}, "speed": {"eval_elapsed_time": 37.211166836321354, "cuda_eval_elapsed_time": 29.662232711791994}, "opt_eval_metrics": {"exact_match": 79.77294228949859, "f1": 87.35885990249378}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l30-dl0-5--2021-01-23--20-19-50/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 34403512, "linear_total": 84934656, "linear_nnz": 10498048, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 722432, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 290304}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 948736, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 459264}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1136128, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1362944, "linear_attention_total": 2359296, "linear_attention_nnz": 779264, "linear_dense_total": 4718592, "linear_dense_nnz": 583680}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1240576, "linear_attention_total": 2359296, "linear_attention_nnz": 575488, "linear_dense_total": 4718592, "linear_dense_nnz": 665088}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1155072, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 614400}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1054720, "linear_attention_total": 2359296, "linear_attention_nnz": 590848, "linear_dense_total": 4718592, "linear_dense_nnz": 463872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1066496, "linear_attention_total": 2359296, "linear_attention_nnz": 696320, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 623104, "linear_attention_total": 2359296, "linear_attention_nnz": 388096, "linear_dense_total": 4718592, "linear_dense_nnz": 235008}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 452608, "linear_attention_total": 2359296, "linear_attention_nnz": 363520, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 382976, "linear_attention_total": 2359296, "linear_attention_nnz": 309248, "linear_dense_total": 4718592, "linear_dense_nnz": 73728}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 352256, "linear_attention_total": 2359296, "linear_attention_nnz": 198656, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}}, "total_sparsity": 68.40618475429675, "linear_sparsity": 87.63985339506173}, "speed": {"eval_elapsed_time": 20.856850353069603, "cuda_eval_elapsed_time": 13.499527885437011}, "opt_eval_metrics": {"exact_match": 77.11447492904446, "f1": 85.59611837921153}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l30-dl0-5--2021-01-23--20-19-50/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 34421912, "linear_total": 84934656, "linear_nnz": 10516480, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 741888, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 290304}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 954880, "linear_attention_total": 2359296, "linear_attention_nnz": 495616, "linear_dense_total": 4718592, "linear_dense_nnz": 459264}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1141248, "linear_attention_total": 2359296, "linear_attention_nnz": 583680, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1373184, "linear_attention_total": 2359296, "linear_attention_nnz": 789504, "linear_dense_total": 4718592, "linear_dense_nnz": 583680}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1247744, "linear_attention_total": 2359296, "linear_attention_nnz": 582656, "linear_dense_total": 4718592, "linear_dense_nnz": 665088}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1163264, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 614400}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042432, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 463872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1085952, "linear_attention_total": 2359296, "linear_attention_nnz": 715776, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 610816, "linear_attention_total": 2359296, "linear_attention_nnz": 375808, "linear_dense_total": 4718592, "linear_dense_nnz": 235008}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 436224, "linear_attention_total": 2359296, "linear_attention_nnz": 347136, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 370688, "linear_attention_total": 2359296, "linear_attention_nnz": 296960, "linear_dense_total": 4718592, "linear_dense_nnz": 73728}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 348160, "linear_attention_total": 2359296, "linear_attention_nnz": 194560, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}}, "total_sparsity": 68.3892874619354, "linear_sparsity": 87.61815200617285}, "speed": {"eval_elapsed_time": 20.86975116888061, "cuda_eval_elapsed_time": 13.492438529968261}, "opt_eval_metrics": {"exact_match": 77.37937559129612, "f1": 85.69020560735045}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l20--2021-01-20--19-01-34/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 63685078, "linear_total": 84934656, "linear_nnz": 39741824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3854752, "linear_attention_total": 2359296, "linear_attention_nnz": 261808, "linear_dense_total": 4718592, "linear_dense_nnz": 3592944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4073232, "linear_attention_total": 2359296, "linear_attention_nnz": 407856, "linear_dense_total": 4718592, "linear_dense_nnz": 3665376}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4218016, "linear_attention_total": 2359296, "linear_attention_nnz": 470352, "linear_dense_total": 4718592, "linear_dense_nnz": 3747664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4318192, "linear_attention_total": 2359296, "linear_attention_nnz": 586320, "linear_dense_total": 4718592, "linear_dense_nnz": 3731872}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4294272, "linear_attention_total": 2359296, "linear_attention_nnz": 598112, "linear_dense_total": 4718592, "linear_dense_nnz": 3696160}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4191568, "linear_attention_total": 2359296, "linear_attention_nnz": 540976, "linear_dense_total": 4718592, "linear_dense_nnz": 3650592}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4018960, "linear_attention_total": 2359296, "linear_attention_nnz": 518320, "linear_dense_total": 4718592, "linear_dense_nnz": 3500640}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3692480, "linear_attention_total": 2359296, "linear_attention_nnz": 494608, "linear_dense_total": 4718592, "linear_dense_nnz": 3197872}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3064736, "linear_attention_total": 2359296, "linear_attention_nnz": 381872, "linear_dense_total": 4718592, "linear_dense_nnz": 2682864}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1882688, "linear_attention_total": 2359296, "linear_attention_nnz": 281888, "linear_dense_total": 4718592, "linear_dense_nnz": 1600800}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307728, "linear_attention_total": 2359296, "linear_attention_nnz": 212544, "linear_dense_total": 4718592, "linear_dense_nnz": 1095184}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 825200, "linear_attention_total": 2359296, "linear_attention_nnz": 122704, "linear_dense_total": 4718592, "linear_dense_nnz": 702496}}, "total_sparsity": 41.51601184669167, "linear_sparsity": 53.208942177854944}, "speed": {"eval_elapsed_time": 40.03914254019037, "cuda_eval_elapsed_time": 32.3459995803833}, "opt_eval_metrics": {"exact_match": 80.52980132450331, "f1": 88.02284574429551}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l20--2021-01-20--19-01-34/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 63495382, "linear_total": 84934656, "linear_nnz": 39552208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3842976, "linear_attention_total": 2359296, "linear_attention_nnz": 258016, "linear_dense_total": 4718592, "linear_dense_nnz": 3584960}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4064144, "linear_attention_total": 2359296, "linear_attention_nnz": 404784, "linear_dense_total": 4718592, "linear_dense_nnz": 3659360}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4202080, "linear_attention_total": 2359296, "linear_attention_nnz": 460752, "linear_dense_total": 4718592, "linear_dense_nnz": 3741328}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4301216, "linear_attention_total": 2359296, "linear_attention_nnz": 577184, "linear_dense_total": 4718592, "linear_dense_nnz": 3724032}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4277440, "linear_attention_total": 2359296, "linear_attention_nnz": 587792, "linear_dense_total": 4718592, "linear_dense_nnz": 3689648}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4172464, "linear_attention_total": 2359296, "linear_attention_nnz": 530480, "linear_dense_total": 4718592, "linear_dense_nnz": 3641984}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3999744, "linear_attention_total": 2359296, "linear_attention_nnz": 508336, "linear_dense_total": 4718592, "linear_dense_nnz": 3491408}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3673360, "linear_attention_total": 2359296, "linear_attention_nnz": 486304, "linear_dense_total": 4718592, "linear_dense_nnz": 3187056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3043376, "linear_attention_total": 2359296, "linear_attention_nnz": 374032, "linear_dense_total": 4718592, "linear_dense_nnz": 2669344}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1863968, "linear_attention_total": 2359296, "linear_attention_nnz": 276992, "linear_dense_total": 4718592, "linear_dense_nnz": 1586976}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1293056, "linear_attention_total": 2359296, "linear_attention_nnz": 209136, "linear_dense_total": 4718592, "linear_dense_nnz": 1083920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818384, "linear_attention_total": 2359296, "linear_attention_nnz": 120976, "linear_dense_total": 4718592, "linear_dense_nnz": 697408}}, "total_sparsity": 41.69021558428826, "linear_sparsity": 53.432191448447156}, "speed": {"eval_elapsed_time": 39.82947535999119, "cuda_eval_elapsed_time": 32.19205239105224}, "opt_eval_metrics": {"exact_match": 80.09460737937559, "f1": 87.80889686617203}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l40--2021-01-20--19-02-03/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 54109530, "linear_total": 84934656, "linear_nnz": 30171936, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3151120, "linear_attention_total": 2359296, "linear_attention_nnz": 172416, "linear_dense_total": 4718592, "linear_dense_nnz": 2978704}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3411360, "linear_attention_total": 2359296, "linear_attention_nnz": 308192, "linear_dense_total": 4718592, "linear_dense_nnz": 3103168}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3491136, "linear_attention_total": 2359296, "linear_attention_nnz": 285568, "linear_dense_total": 4718592, "linear_dense_nnz": 3205568}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3603168, "linear_attention_total": 2359296, "linear_attention_nnz": 437904, "linear_dense_total": 4718592, "linear_dense_nnz": 3165264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3410880, "linear_attention_total": 2359296, "linear_attention_nnz": 321040, "linear_dense_total": 4718592, "linear_dense_nnz": 3089840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3356416, "linear_attention_total": 2359296, "linear_attention_nnz": 332784, "linear_dense_total": 4718592, "linear_dense_nnz": 3023632}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3072896, "linear_attention_total": 2359296, "linear_attention_nnz": 288464, "linear_dense_total": 4718592, "linear_dense_nnz": 2784432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2683232, "linear_attention_total": 2359296, "linear_attention_nnz": 328464, "linear_dense_total": 4718592, "linear_dense_nnz": 2354768}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1957200, "linear_attention_total": 2359296, "linear_attention_nnz": 204832, "linear_dense_total": 4718592, "linear_dense_nnz": 1752368}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 905552, "linear_attention_total": 2359296, "linear_attention_nnz": 189616, "linear_dense_total": 4718592, "linear_dense_nnz": 715936}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667120, "linear_attention_total": 2359296, "linear_attention_nnz": 140384, "linear_dense_total": 4718592, "linear_dense_nnz": 526736}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 461856, "linear_attention_total": 2359296, "linear_attention_nnz": 84608, "linear_dense_total": 4718592, "linear_dense_nnz": 377248}}, "total_sparsity": 50.30953543778212, "linear_sparsity": 64.47629575376158}, "speed": {"eval_elapsed_time": 36.13367621740326, "cuda_eval_elapsed_time": 28.556625274658202}, "opt_eval_metrics": {"exact_match": 79.64049195837275, "f1": 87.31499809166372}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l40--2021-01-20--19-02-03/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53891686, "linear_total": 84934656, "linear_nnz": 29954112, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3130496, "linear_attention_total": 2359296, "linear_attention_nnz": 169136, "linear_dense_total": 4718592, "linear_dense_nnz": 2961360}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3393488, "linear_attention_total": 2359296, "linear_attention_nnz": 304464, "linear_dense_total": 4718592, "linear_dense_nnz": 3089024}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3470880, "linear_attention_total": 2359296, "linear_attention_nnz": 279216, "linear_dense_total": 4718592, "linear_dense_nnz": 3191664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3580464, "linear_attention_total": 2359296, "linear_attention_nnz": 429728, "linear_dense_total": 4718592, "linear_dense_nnz": 3150736}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3390736, "linear_attention_total": 2359296, "linear_attention_nnz": 314688, "linear_dense_total": 4718592, "linear_dense_nnz": 3076048}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3334432, "linear_attention_total": 2359296, "linear_attention_nnz": 326416, "linear_dense_total": 4718592, "linear_dense_nnz": 3008016}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3048464, "linear_attention_total": 2359296, "linear_attention_nnz": 281984, "linear_dense_total": 4718592, "linear_dense_nnz": 2766480}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2658992, "linear_attention_total": 2359296, "linear_attention_nnz": 320352, "linear_dense_total": 4718592, "linear_dense_nnz": 2338640}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1936656, "linear_attention_total": 2359296, "linear_attention_nnz": 200608, "linear_dense_total": 4718592, "linear_dense_nnz": 1736048}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 892160, "linear_attention_total": 2359296, "linear_attention_nnz": 185008, "linear_dense_total": 4718592, "linear_dense_nnz": 707152}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 660320, "linear_attention_total": 2359296, "linear_attention_nnz": 137920, "linear_dense_total": 4718592, "linear_dense_nnz": 522400}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 457024, "linear_attention_total": 2359296, "linear_attention_nnz": 82480, "linear_dense_total": 4718592, "linear_dense_nnz": 374544}}, "total_sparsity": 50.50958835936713, "linear_sparsity": 64.7327564380787}, "speed": {"eval_elapsed_time": 36.152482252102345, "cuda_eval_elapsed_time": 28.558930541992186}, "opt_eval_metrics": {"exact_match": 79.06338694418164, "f1": 86.86293366416082}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53990689, "linear_total": 84934656, "linear_nnz": 30067968, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2828544, "linear_attention_total": 2359296, "linear_attention_nnz": 880896, "linear_dense_total": 4718592, "linear_dense_nnz": 1947648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2819840, "linear_attention_total": 2359296, "linear_attention_nnz": 849152, "linear_dense_total": 4718592, "linear_dense_nnz": 1970688}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3367424, "linear_attention_total": 2359296, "linear_attention_nnz": 1169408, "linear_dense_total": 4718592, "linear_dense_nnz": 2198016}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3521280, "linear_attention_total": 2359296, "linear_attention_nnz": 1352448, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3693824, "linear_attention_total": 2359296, "linear_attention_nnz": 1524992, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3554560, "linear_attention_total": 2359296, "linear_attention_nnz": 1511680, "linear_dense_total": 4718592, "linear_dense_nnz": 2042880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2935296, "linear_attention_total": 2359296, "linear_attention_nnz": 1336320, "linear_dense_total": 4718592, "linear_dense_nnz": 1598976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2452992, "linear_attention_total": 2359296, "linear_attention_nnz": 1178112, "linear_dense_total": 4718592, "linear_dense_nnz": 1274880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1836032, "linear_attention_total": 2359296, "linear_attention_nnz": 1134080, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1024000, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 321024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 812032, "linear_attention_total": 2359296, "linear_attention_nnz": 583168, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222144, "linear_attention_total": 2359296, "linear_attention_nnz": 397312, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}}, "total_sparsity": 50.41867082482094, "linear_sparsity": 64.59870515046296}, "speed": {"eval_elapsed_time": 30.796412555966526, "cuda_eval_elapsed_time": 23.427229469299316}, "opt_eval_metrics": {"exact_match": 80.93661305581836, "f1": 88.35425478567389}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53994017, "linear_total": 84934656, "linear_nnz": 30071296, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2825984, "linear_attention_total": 2359296, "linear_attention_nnz": 878336, "linear_dense_total": 4718592, "linear_dense_nnz": 1947648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2823424, "linear_attention_total": 2359296, "linear_attention_nnz": 852736, "linear_dense_total": 4718592, "linear_dense_nnz": 1970688}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3366400, "linear_attention_total": 2359296, "linear_attention_nnz": 1168384, "linear_dense_total": 4718592, "linear_dense_nnz": 2198016}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3529216, "linear_attention_total": 2359296, "linear_attention_nnz": 1360384, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3694080, "linear_attention_total": 2359296, "linear_attention_nnz": 1525248, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3562240, "linear_attention_total": 2359296, "linear_attention_nnz": 1519360, "linear_dense_total": 4718592, "linear_dense_nnz": 2042880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2944768, "linear_attention_total": 2359296, "linear_attention_nnz": 1345792, "linear_dense_total": 4718592, "linear_dense_nnz": 1598976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2450176, "linear_attention_total": 2359296, "linear_attention_nnz": 1175296, "linear_dense_total": 4718592, "linear_dense_nnz": 1274880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828864, "linear_attention_total": 2359296, "linear_attention_nnz": 1126912, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023488, "linear_attention_total": 2359296, "linear_attention_nnz": 702464, "linear_dense_total": 4718592, "linear_dense_nnz": 321024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 812032, "linear_attention_total": 2359296, "linear_attention_nnz": 583168, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1210624, "linear_attention_total": 2359296, "linear_attention_nnz": 385792, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}}, "total_sparsity": 50.41561461889819, "linear_sparsity": 64.5947868441358}, "speed": {"eval_elapsed_time": 30.552880198229104, "cuda_eval_elapsed_time": 23.067204750061034}, "opt_eval_metrics": {"exact_match": 81.11636707663197, "f1": 88.26635621180897}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 48926434, "linear_total": 84934656, "linear_nnz": 25008128, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102784, "linear_attention_total": 2359296, "linear_attention_nnz": 976896, "linear_dense_total": 4718592, "linear_dense_nnz": 1125888}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2018816, "linear_attention_total": 2359296, "linear_attention_nnz": 733184, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2776064, "linear_attention_total": 2359296, "linear_attention_nnz": 1252352, "linear_dense_total": 4718592, "linear_dense_nnz": 1523712}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2993664, "linear_attention_total": 2359296, "linear_attention_nnz": 1437696, "linear_dense_total": 4718592, "linear_dense_nnz": 1555968}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3162624, "linear_attention_total": 2359296, "linear_attention_nnz": 1545216, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3089408, "linear_attention_total": 2359296, "linear_attention_nnz": 1574912, "linear_dense_total": 4718592, "linear_dense_nnz": 1514496}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2505216, "linear_attention_total": 2359296, "linear_attention_nnz": 1370112, "linear_dense_total": 4718592, "linear_dense_nnz": 1135104}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2026496, "linear_attention_total": 2359296, "linear_attention_nnz": 1178624, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1665536, "linear_attention_total": 2359296, "linear_attention_nnz": 1190912, "linear_dense_total": 4718592, "linear_dense_nnz": 474624}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 957440, "linear_attention_total": 2359296, "linear_attention_nnz": 748544, "linear_dense_total": 4718592, "linear_dense_nnz": 208896}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 805888, "linear_attention_total": 2359296, "linear_attention_nnz": 636928, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 904192, "linear_attention_total": 2359296, "linear_attention_nnz": 418816, "linear_dense_total": 4718592, "linear_dense_nnz": 485376}}, "total_sparsity": 55.069333723048565, "linear_sparsity": 70.55603780864197}, "speed": {"eval_elapsed_time": 27.97396031860262, "cuda_eval_elapsed_time": 20.632953029632567}, "opt_eval_metrics": {"exact_match": 80.79470198675497, "f1": 88.10958975740277}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 48725622, "linear_total": 84934656, "linear_nnz": 24807424, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2098688, "linear_attention_total": 2359296, "linear_attention_nnz": 991232, "linear_dense_total": 4718592, "linear_dense_nnz": 1107456}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2003456, "linear_attention_total": 2359296, "linear_attention_nnz": 730112, "linear_dense_total": 4718592, "linear_dense_nnz": 1273344}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2731008, "linear_attention_total": 2359296, "linear_attention_nnz": 1225728, "linear_dense_total": 4718592, "linear_dense_nnz": 1505280}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2977280, "linear_attention_total": 2359296, "linear_attention_nnz": 1433600, "linear_dense_total": 4718592, "linear_dense_nnz": 1543680}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3176448, "linear_attention_total": 2359296, "linear_attention_nnz": 1566720, "linear_dense_total": 4718592, "linear_dense_nnz": 1609728}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3081216, "linear_attention_total": 2359296, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 1508352}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2487808, "linear_attention_total": 2359296, "linear_attention_nnz": 1361920, "linear_dense_total": 4718592, "linear_dense_nnz": 1125888}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2003456, "linear_attention_total": 2359296, "linear_attention_nnz": 1166336, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1617408, "linear_attention_total": 2359296, "linear_attention_nnz": 1148928, "linear_dense_total": 4718592, "linear_dense_nnz": 468480}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 945664, "linear_attention_total": 2359296, "linear_attention_nnz": 738304, "linear_dense_total": 4718592, "linear_dense_nnz": 207360}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 787456, "linear_attention_total": 2359296, "linear_attention_nnz": 624640, "linear_dense_total": 4718592, "linear_dense_nnz": 162816}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 897536, "linear_attention_total": 2359296, "linear_attention_nnz": 419840, "linear_dense_total": 4718592, "linear_dense_nnz": 477696}}, "total_sparsity": 55.25374562922606, "linear_sparsity": 70.79234182098766}, "speed": {"eval_elapsed_time": 27.982159624807537, "cuda_eval_elapsed_time": 20.669778549194337}, "opt_eval_metrics": {"exact_match": 80.48249763481552, "f1": 88.07285498416482}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 48790134, "linear_total": 84934656, "linear_nnz": 24871936, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2086400, "linear_attention_total": 2359296, "linear_attention_nnz": 978944, "linear_dense_total": 4718592, "linear_dense_nnz": 1107456}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1995264, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 1273344}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2738176, "linear_attention_total": 2359296, "linear_attention_nnz": 1232896, "linear_dense_total": 4718592, "linear_dense_nnz": 1505280}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2998784, "linear_attention_total": 2359296, "linear_attention_nnz": 1455104, "linear_dense_total": 4718592, "linear_dense_nnz": 1543680}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3208192, "linear_attention_total": 2359296, "linear_attention_nnz": 1598464, "linear_dense_total": 4718592, "linear_dense_nnz": 1609728}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3104768, "linear_attention_total": 2359296, "linear_attention_nnz": 1596416, "linear_dense_total": 4718592, "linear_dense_nnz": 1508352}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2499072, "linear_attention_total": 2359296, "linear_attention_nnz": 1373184, "linear_dense_total": 4718592, "linear_dense_nnz": 1125888}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2002432, "linear_attention_total": 2359296, "linear_attention_nnz": 1165312, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1631744, "linear_attention_total": 2359296, "linear_attention_nnz": 1163264, "linear_dense_total": 4718592, "linear_dense_nnz": 468480}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 947712, "linear_attention_total": 2359296, "linear_attention_nnz": 740352, "linear_dense_total": 4718592, "linear_dense_nnz": 207360}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 778240, "linear_attention_total": 2359296, "linear_attention_nnz": 615424, "linear_dense_total": 4718592, "linear_dense_nnz": 162816}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 881152, "linear_attention_total": 2359296, "linear_attention_nnz": 403456, "linear_dense_total": 4718592, "linear_dense_nnz": 477696}}, "total_sparsity": 55.19450225287742, "linear_sparsity": 70.71638695987654}, "speed": {"eval_elapsed_time": 28.054355942178518, "cuda_eval_elapsed_time": 20.71169916152954}, "opt_eval_metrics": {"exact_match": 80.70009460737937, "f1": 88.04831949879843}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a8-l20-dl1--2021-01-24--15-46-47/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 42955274, "linear_total": 84934656, "linear_nnz": 19026944, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1496000, "linear_attention_total": 2359296, "linear_attention_nnz": 407936, "linear_dense_total": 4718592, "linear_dense_nnz": 1088064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1948032, "linear_attention_total": 2359296, "linear_attention_nnz": 569088, "linear_dense_total": 4718592, "linear_dense_nnz": 1378944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2466112, "linear_attention_total": 2359296, "linear_attention_nnz": 770560, "linear_dense_total": 4718592, "linear_dense_nnz": 1695552}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2583360, "linear_attention_total": 2359296, "linear_attention_nnz": 902848, "linear_dense_total": 4718592, "linear_dense_nnz": 1680512}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2537856, "linear_attention_total": 2359296, "linear_attention_nnz": 913216, "linear_dense_total": 4718592, "linear_dense_nnz": 1624640}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2284352, "linear_attention_total": 2359296, "linear_attention_nnz": 749440, "linear_dense_total": 4718592, "linear_dense_nnz": 1534912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1875456, "linear_attention_total": 2359296, "linear_attention_nnz": 684480, "linear_dense_total": 4718592, "linear_dense_nnz": 1190976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488192, "linear_attention_total": 2359296, "linear_attention_nnz": 672320, "linear_dense_total": 4718592, "linear_dense_nnz": 815872}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969280, "linear_attention_total": 2359296, "linear_attention_nnz": 570176, "linear_dense_total": 4718592, "linear_dense_nnz": 399104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 513408, "linear_attention_total": 2359296, "linear_attention_nnz": 345664, "linear_dense_total": 4718592, "linear_dense_nnz": 167744}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 479680, "linear_attention_total": 2359296, "linear_attention_nnz": 298112, "linear_dense_total": 4718592, "linear_dense_nnz": 181568}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 385216, "linear_attention_total": 2359296, "linear_attention_nnz": 185728, "linear_dense_total": 4718592, "linear_dense_nnz": 199488}}, "total_sparsity": 60.55283569350244, "linear_sparsity": 77.59813850308642}, "speed": {"eval_elapsed_time": 32.10200677579269, "cuda_eval_elapsed_time": 24.667898628234862}, "opt_eval_metrics": {"exact_match": 79.05392620624409, "f1": 86.84949475139184}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l10--2021-01-20--19-01-04/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 72930262, "linear_total": 84934656, "linear_nnz": 48982384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4408688, "linear_attention_total": 2359296, "linear_attention_nnz": 428592, "linear_dense_total": 4718592, "linear_dense_nnz": 3980096}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4561328, "linear_attention_total": 2359296, "linear_attention_nnz": 545744, "linear_dense_total": 4718592, "linear_dense_nnz": 4015584}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4791104, "linear_attention_total": 2359296, "linear_attention_nnz": 729664, "linear_dense_total": 4718592, "linear_dense_nnz": 4061440}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4914112, "linear_attention_total": 2359296, "linear_attention_nnz": 851472, "linear_dense_total": 4718592, "linear_dense_nnz": 4062640}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5008736, "linear_attention_total": 2359296, "linear_attention_nnz": 960992, "linear_dense_total": 4718592, "linear_dense_nnz": 4047744}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4908864, "linear_attention_total": 2359296, "linear_attention_nnz": 902768, "linear_dense_total": 4718592, "linear_dense_nnz": 4006096}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4781792, "linear_attention_total": 2359296, "linear_attention_nnz": 861120, "linear_dense_total": 4718592, "linear_dense_nnz": 3920672}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4492512, "linear_attention_total": 2359296, "linear_attention_nnz": 759664, "linear_dense_total": 4718592, "linear_dense_nnz": 3732848}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4061488, "linear_attention_total": 2359296, "linear_attention_nnz": 670096, "linear_dense_total": 4718592, "linear_dense_nnz": 3391392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3105840, "linear_attention_total": 2359296, "linear_attention_nnz": 444064, "linear_dense_total": 4718592, "linear_dense_nnz": 2661776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2422000, "linear_attention_total": 2359296, "linear_attention_nnz": 329968, "linear_dense_total": 4718592, "linear_dense_nnz": 2092032}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1525920, "linear_attention_total": 2359296, "linear_attention_nnz": 190816, "linear_dense_total": 4718592, "linear_dense_nnz": 1335104}}, "total_sparsity": 33.025871793300276, "linear_sparsity": 42.329331386236504}, "speed": {"eval_elapsed_time": 41.87211530236527, "cuda_eval_elapsed_time": 34.2993692779541}, "opt_eval_metrics": {"exact_match": 81.47587511825922, "f1": 88.58172107792693}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l40-dl1--2021-01-24--15-48-35/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 35270510, "linear_total": 84934656, "linear_nnz": 11343312, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023712, "linear_attention_total": 2359296, "linear_attention_nnz": 246400, "linear_dense_total": 4718592, "linear_dense_nnz": 777312}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1286416, "linear_attention_total": 2359296, "linear_attention_nnz": 381872, "linear_dense_total": 4718592, "linear_dense_nnz": 904544}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1485456, "linear_attention_total": 2359296, "linear_attention_nnz": 460688, "linear_dense_total": 4718592, "linear_dense_nnz": 1024768}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1562240, "linear_attention_total": 2359296, "linear_attention_nnz": 556080, "linear_dense_total": 4718592, "linear_dense_nnz": 1006160}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1407968, "linear_attention_total": 2359296, "linear_attention_nnz": 487760, "linear_dense_total": 4718592, "linear_dense_nnz": 920208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1263360, "linear_attention_total": 2359296, "linear_attention_nnz": 403424, "linear_dense_total": 4718592, "linear_dense_nnz": 859936}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059616, "linear_attention_total": 2359296, "linear_attention_nnz": 380560, "linear_dense_total": 4718592, "linear_dense_nnz": 679056}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 863744, "linear_attention_total": 2359296, "linear_attention_nnz": 400704, "linear_dense_total": 4718592, "linear_dense_nnz": 463040}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 550816, "linear_attention_total": 2359296, "linear_attention_nnz": 266832, "linear_dense_total": 4718592, "linear_dense_nnz": 283984}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 327680, "linear_attention_total": 2359296, "linear_attention_nnz": 225120, "linear_dense_total": 4718592, "linear_dense_nnz": 102560}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 287936, "linear_attention_total": 2359296, "linear_attention_nnz": 169216, "linear_dense_total": 4718592, "linear_dense_nnz": 118720}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 224368, "linear_attention_total": 2359296, "linear_attention_nnz": 113984, "linear_dense_total": 4718592, "linear_dense_nnz": 110384}}, "total_sparsity": 67.60999352154138, "linear_sparsity": 86.64466010199654}, "speed": {"eval_elapsed_time": 32.05906807305291, "cuda_eval_elapsed_time": 24.637864067077636}, "opt_eval_metrics": {"exact_match": 77.84295175023652, "f1": 85.93146728512978}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l40-dl1--2021-01-24--15-48-35/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35101310, "linear_total": 84934656, "linear_nnz": 11174304, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1005072, "linear_attention_total": 2359296, "linear_attention_nnz": 244016, "linear_dense_total": 4718592, "linear_dense_nnz": 761056}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1265136, "linear_attention_total": 2359296, "linear_attention_nnz": 377744, "linear_dense_total": 4718592, "linear_dense_nnz": 887392}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1463488, "linear_attention_total": 2359296, "linear_attention_nnz": 453520, "linear_dense_total": 4718592, "linear_dense_nnz": 1009968}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1538448, "linear_attention_total": 2359296, "linear_attention_nnz": 549264, "linear_dense_total": 4718592, "linear_dense_nnz": 989184}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1387552, "linear_attention_total": 2359296, "linear_attention_nnz": 480528, "linear_dense_total": 4718592, "linear_dense_nnz": 907024}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1243792, "linear_attention_total": 2359296, "linear_attention_nnz": 397568, "linear_dense_total": 4718592, "linear_dense_nnz": 846224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1044112, "linear_attention_total": 2359296, "linear_attention_nnz": 373968, "linear_dense_total": 4718592, "linear_dense_nnz": 670144}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 851488, "linear_attention_total": 2359296, "linear_attention_nnz": 394160, "linear_dense_total": 4718592, "linear_dense_nnz": 457328}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 543232, "linear_attention_total": 2359296, "linear_attention_nnz": 262368, "linear_dense_total": 4718592, "linear_dense_nnz": 280864}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 323648, "linear_attention_total": 2359296, "linear_attention_nnz": 222176, "linear_dense_total": 4718592, "linear_dense_nnz": 101472}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 284976, "linear_attention_total": 2359296, "linear_attention_nnz": 166848, "linear_dense_total": 4718592, "linear_dense_nnz": 118128}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 223360, "linear_attention_total": 2359296, "linear_attention_nnz": 113088, "linear_dense_total": 4718592, "linear_dense_nnz": 110272}}, "total_sparsity": 67.7653751447772, "linear_sparsity": 86.84364601417825}, "speed": {"eval_elapsed_time": 32.0392144843936, "cuda_eval_elapsed_time": 24.631753623962403}, "opt_eval_metrics": {"exact_match": 77.47398297067171, "f1": 85.88482767255138}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l40-dl1--2021-01-24--15-48-35/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 35095854, "linear_total": 84934656, "linear_nnz": 11168864, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004320, "linear_attention_total": 2359296, "linear_attention_nnz": 244080, "linear_dense_total": 4718592, "linear_dense_nnz": 760240}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264816, "linear_attention_total": 2359296, "linear_attention_nnz": 377328, "linear_dense_total": 4718592, "linear_dense_nnz": 887488}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1463360, "linear_attention_total": 2359296, "linear_attention_nnz": 453680, "linear_dense_total": 4718592, "linear_dense_nnz": 1009680}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1537232, "linear_attention_total": 2359296, "linear_attention_nnz": 549056, "linear_dense_total": 4718592, "linear_dense_nnz": 988176}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1386720, "linear_attention_total": 2359296, "linear_attention_nnz": 480112, "linear_dense_total": 4718592, "linear_dense_nnz": 906608}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244032, "linear_attention_total": 2359296, "linear_attention_nnz": 397488, "linear_dense_total": 4718592, "linear_dense_nnz": 846544}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1043552, "linear_attention_total": 2359296, "linear_attention_nnz": 373632, "linear_dense_total": 4718592, "linear_dense_nnz": 669920}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 850736, "linear_attention_total": 2359296, "linear_attention_nnz": 393728, "linear_dense_total": 4718592, "linear_dense_nnz": 457008}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 543088, "linear_attention_total": 2359296, "linear_attention_nnz": 262272, "linear_dense_total": 4718592, "linear_dense_nnz": 280816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 323184, "linear_attention_total": 2359296, "linear_attention_nnz": 221824, "linear_dense_total": 4718592, "linear_dense_nnz": 101360}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 284528, "linear_attention_total": 2359296, "linear_attention_nnz": 166640, "linear_dense_total": 4718592, "linear_dense_nnz": 117888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 223296, "linear_attention_total": 2359296, "linear_attention_nnz": 113056, "linear_dense_total": 4718592, "linear_dense_nnz": 110240}}, "total_sparsity": 67.77038555929478, "linear_sparsity": 86.85005093798226}, "speed": {"eval_elapsed_time": 32.04897632403299, "cuda_eval_elapsed_time": 24.588402084350587}, "opt_eval_metrics": {"exact_match": 77.4077578051088, "f1": 85.78500582028688}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l40--2021-01-20--19-00-35/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53211146, "linear_total": 84934656, "linear_nnz": 29278080, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2993984, "linear_attention_total": 2359296, "linear_attention_nnz": 241280, "linear_dense_total": 4718592, "linear_dense_nnz": 2752704}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3330688, "linear_attention_total": 2359296, "linear_attention_nnz": 379584, "linear_dense_total": 4718592, "linear_dense_nnz": 2951104}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3517120, "linear_attention_total": 2359296, "linear_attention_nnz": 322880, "linear_dense_total": 4718592, "linear_dense_nnz": 3194240}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3720576, "linear_attention_total": 2359296, "linear_attention_nnz": 565440, "linear_dense_total": 4718592, "linear_dense_nnz": 3155136}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3455168, "linear_attention_total": 2359296, "linear_attention_nnz": 390400, "linear_dense_total": 4718592, "linear_dense_nnz": 3064768}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3400192, "linear_attention_total": 2359296, "linear_attention_nnz": 406592, "linear_dense_total": 4718592, "linear_dense_nnz": 2993600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2988160, "linear_attention_total": 2359296, "linear_attention_nnz": 356480, "linear_dense_total": 4718592, "linear_dense_nnz": 2631680}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2477696, "linear_attention_total": 2359296, "linear_attention_nnz": 409920, "linear_dense_total": 4718592, "linear_dense_nnz": 2067776}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1612416, "linear_attention_total": 2359296, "linear_attention_nnz": 242048, "linear_dense_total": 4718592, "linear_dense_nnz": 1370368}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 697664, "linear_attention_total": 2359296, "linear_attention_nnz": 224896, "linear_dense_total": 4718592, "linear_dense_nnz": 472768}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591360, "linear_attention_total": 2359296, "linear_attention_nnz": 172352, "linear_dense_total": 4718592, "linear_dense_nnz": 419008}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 493056, "linear_attention_total": 2359296, "linear_attention_nnz": 104768, "linear_dense_total": 4718592, "linear_dense_nnz": 388288}}, "total_sparsity": 51.13454941064908, "linear_sparsity": 65.52870008680556}, "speed": {"eval_elapsed_time": 33.4375456799753, "cuda_eval_elapsed_time": 25.933858947753908}, "opt_eval_metrics": {"exact_match": 78.76064333017976, "f1": 86.75922108224064}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l30-dl0-25--2021-01-23--20-20-19/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 37255475, "linear_total": 84934656, "linear_nnz": 13348352, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1053184, "linear_attention_total": 2359296, "linear_attention_nnz": 455680, "linear_dense_total": 4718592, "linear_dense_nnz": 597504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1218560, "linear_attention_total": 2359296, "linear_attention_nnz": 364544, "linear_dense_total": 4718592, "linear_dense_nnz": 854016}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1503232, "linear_attention_total": 2359296, "linear_attention_nnz": 529408, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1714176, "linear_attention_total": 2359296, "linear_attention_nnz": 749568, "linear_dense_total": 4718592, "linear_dense_nnz": 964608}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1626112, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 1047552}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1592320, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 992256}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1322496, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 775680}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1302016, "linear_attention_total": 2359296, "linear_attention_nnz": 686080, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 678400, "linear_attention_total": 2359296, "linear_attention_nnz": 335872, "linear_dense_total": 4718592, "linear_dense_nnz": 342528}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 493568, "linear_attention_total": 2359296, "linear_attention_nnz": 358400, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 404992, "linear_attention_total": 2359296, "linear_attention_nnz": 286720, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 439296, "linear_attention_total": 2359296, "linear_attention_nnz": 162816, "linear_dense_total": 4718592, "linear_dense_nnz": 276480}}, "total_sparsity": 65.78713841653968, "linear_sparsity": 84.28397472993827}, "speed": {"eval_elapsed_time": 21.72890411503613, "cuda_eval_elapsed_time": 14.394198833465577}, "opt_eval_metrics": {"exact_match": 78.06054872280038, "f1": 86.20063710644014}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l20--2021-01-19--16-59-13/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 49113499, "linear_total": 84934656, "linear_nnz": 25174883, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2532837, "linear_attention_total": 2359296, "linear_attention_nnz": 278464, "linear_dense_total": 4718592, "linear_dense_nnz": 2254373}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2724403, "linear_attention_total": 2359296, "linear_attention_nnz": 411200, "linear_dense_total": 4718592, "linear_dense_nnz": 2313203}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2772181, "linear_attention_total": 2359296, "linear_attention_nnz": 388544, "linear_dense_total": 4718592, "linear_dense_nnz": 2383637}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2962889, "linear_attention_total": 2359296, "linear_attention_nnz": 616064, "linear_dense_total": 4718592, "linear_dense_nnz": 2346825}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2756799, "linear_attention_total": 2359296, "linear_attention_nnz": 475392, "linear_dense_total": 4718592, "linear_dense_nnz": 2281407}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2741284, "linear_attention_total": 2359296, "linear_attention_nnz": 485760, "linear_dense_total": 4718592, "linear_dense_nnz": 2255524}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2526246, "linear_attention_total": 2359296, "linear_attention_nnz": 436416, "linear_dense_total": 4718592, "linear_dense_nnz": 2089830}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2233017, "linear_attention_total": 2359296, "linear_attention_nnz": 473664, "linear_dense_total": 4718592, "linear_dense_nnz": 1759353}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652692, "linear_attention_total": 2359296, "linear_attention_nnz": 292096, "linear_dense_total": 4718592, "linear_dense_nnz": 1360596}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1056535, "linear_attention_total": 2359296, "linear_attention_nnz": 260864, "linear_dense_total": 4718592, "linear_dense_nnz": 795671}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 795434, "linear_attention_total": 2359296, "linear_attention_nnz": 207872, "linear_dense_total": 4718592, "linear_dense_nnz": 587562}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 420566, "linear_attention_total": 2359296, "linear_attention_nnz": 115648, "linear_dense_total": 4718592, "linear_dense_nnz": 304918}}, "total_sparsity": 54.89754611459343, "linear_sparsity": 70.35970452391072}, "speed": {"eval_elapsed_time": 36.364678455051035, "cuda_eval_elapsed_time": 28.9650231628418}, "opt_eval_metrics": {"exact_match": 80.15137180700094, "f1": 87.62280270760408}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 50398933, "linear_total": 84934656, "linear_nnz": 26460853, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673633, "linear_attention_total": 2359296, "linear_attention_nnz": 341248, "linear_dense_total": 4718592, "linear_dense_nnz": 2332385}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850180, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 2387588}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2871056, "linear_attention_total": 2359296, "linear_attention_nnz": 412672, "linear_dense_total": 4718592, "linear_dense_nnz": 2458384}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114067, "linear_attention_total": 2359296, "linear_attention_nnz": 692736, "linear_dense_total": 4718592, "linear_dense_nnz": 2421331}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2853594, "linear_attention_total": 2359296, "linear_attention_nnz": 505088, "linear_dense_total": 4718592, "linear_dense_nnz": 2348506}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2871518, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 2322654}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2608144, "linear_attention_total": 2359296, "linear_attention_nnz": 469504, "linear_dense_total": 4718592, "linear_dense_nnz": 2138640}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2382631, "linear_attention_total": 2359296, "linear_attention_nnz": 552448, "linear_dense_total": 4718592, "linear_dense_nnz": 1830183}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757175, "linear_attention_total": 2359296, "linear_attention_nnz": 316672, "linear_dense_total": 4718592, "linear_dense_nnz": 1440503}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1151305, "linear_attention_total": 2359296, "linear_attention_nnz": 292096, "linear_dense_total": 4718592, "linear_dense_nnz": 859209}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 873504, "linear_attention_total": 2359296, "linear_attention_nnz": 227328, "linear_dense_total": 4718592, "linear_dense_nnz": 646176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 454046, "linear_attention_total": 2359296, "linear_attention_nnz": 128000, "linear_dense_total": 4718592, "linear_dense_nnz": 326046}}, "total_sparsity": 53.71709208691902, "linear_sparsity": 68.84563469592435}, "speed": {"eval_elapsed_time": 35.16166925104335, "cuda_eval_elapsed_time": 27.83310959625244}, "opt_eval_metrics": {"exact_match": 79.92431409649953, "f1": 87.57193515884181}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 50358753, "linear_total": 84934656, "linear_nnz": 26420688, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2668105, "linear_attention_total": 2359296, "linear_attention_nnz": 335872, "linear_dense_total": 4718592, "linear_dense_nnz": 2332233}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2839080, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 2387496}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2866908, "linear_attention_total": 2359296, "linear_attention_nnz": 408576, "linear_dense_total": 4718592, "linear_dense_nnz": 2458332}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3103682, "linear_attention_total": 2359296, "linear_attention_nnz": 682496, "linear_dense_total": 4718592, "linear_dense_nnz": 2421186}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2853238, "linear_attention_total": 2359296, "linear_attention_nnz": 504832, "linear_dense_total": 4718592, "linear_dense_nnz": 2348406}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2880784, "linear_attention_total": 2359296, "linear_attention_nnz": 558336, "linear_dense_total": 4718592, "linear_dense_nnz": 2322448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2614378, "linear_attention_total": 2359296, "linear_attention_nnz": 475904, "linear_dense_total": 4718592, "linear_dense_nnz": 2138474}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2372808, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 1830088}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1752978, "linear_attention_total": 2359296, "linear_attention_nnz": 312576, "linear_dense_total": 4718592, "linear_dense_nnz": 1440402}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1147129, "linear_attention_total": 2359296, "linear_attention_nnz": 288000, "linear_dense_total": 4718592, "linear_dense_nnz": 859129}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870927, "linear_attention_total": 2359296, "linear_attention_nnz": 224768, "linear_dense_total": 4718592, "linear_dense_nnz": 646159}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 450671, "linear_attention_total": 2359296, "linear_attention_nnz": 124672, "linear_dense_total": 4718592, "linear_dense_nnz": 325999}}, "total_sparsity": 53.75399063078199, "linear_sparsity": 68.89292399088542}, "speed": {"eval_elapsed_time": 35.3477450478822, "cuda_eval_elapsed_time": 27.96729845428467}, "opt_eval_metrics": {"exact_match": 80.02838221381268, "f1": 87.5280353923367}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 58360680, "linear_total": 84934656, "linear_nnz": 34416900, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3232823, "linear_attention_total": 2359296, "linear_attention_nnz": 405824, "linear_dense_total": 4718592, "linear_dense_nnz": 2826999}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3412647, "linear_attention_total": 2359296, "linear_attention_nnz": 543872, "linear_dense_total": 4718592, "linear_dense_nnz": 2868775}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3556851, "linear_attention_total": 2359296, "linear_attention_nnz": 613248, "linear_dense_total": 4718592, "linear_dense_nnz": 2943603}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3708198, "linear_attention_total": 2359296, "linear_attention_nnz": 791424, "linear_dense_total": 4718592, "linear_dense_nnz": 2916774}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3695959, "linear_attention_total": 2359296, "linear_attention_nnz": 819072, "linear_dense_total": 4718592, "linear_dense_nnz": 2876887}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3644598, "linear_attention_total": 2359296, "linear_attention_nnz": 788928, "linear_dense_total": 4718592, "linear_dense_nnz": 2855670}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3486486, "linear_attention_total": 2359296, "linear_attention_nnz": 761600, "linear_dense_total": 4718592, "linear_dense_nnz": 2724886}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114460, "linear_attention_total": 2359296, "linear_attention_nnz": 686464, "linear_dense_total": 4718592, "linear_dense_nnz": 2427996}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2616038, "linear_attention_total": 2359296, "linear_attention_nnz": 602496, "linear_dense_total": 4718592, "linear_dense_nnz": 2013542}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1775741, "linear_attention_total": 2359296, "linear_attention_nnz": 381632, "linear_dense_total": 4718592, "linear_dense_nnz": 1394109}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1407393, "linear_attention_total": 2359296, "linear_attention_nnz": 325760, "linear_dense_total": 4718592, "linear_dense_nnz": 1081633}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 765706, "linear_attention_total": 2359296, "linear_attention_nnz": 174016, "linear_dense_total": 4718592, "linear_dense_nnz": 591690}}, "total_sparsity": 46.405572153982156, "linear_sparsity": 59.47837829589844}, "speed": {"eval_elapsed_time": 39.6229472043924, "cuda_eval_elapsed_time": 32.23066467285156}, "opt_eval_metrics": {"exact_match": 81.00283822138127, "f1": 88.2671108560581}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 58344499, "linear_total": 84934656, "linear_nnz": 34400721, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3231632, "linear_attention_total": 2359296, "linear_attention_nnz": 404736, "linear_dense_total": 4718592, "linear_dense_nnz": 2826896}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3411716, "linear_attention_total": 2359296, "linear_attention_nnz": 543040, "linear_dense_total": 4718592, "linear_dense_nnz": 2868676}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3557965, "linear_attention_total": 2359296, "linear_attention_nnz": 614464, "linear_dense_total": 4718592, "linear_dense_nnz": 2943501}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3706774, "linear_attention_total": 2359296, "linear_attention_nnz": 790144, "linear_dense_total": 4718592, "linear_dense_nnz": 2916630}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3693580, "linear_attention_total": 2359296, "linear_attention_nnz": 816832, "linear_dense_total": 4718592, "linear_dense_nnz": 2876748}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3641505, "linear_attention_total": 2359296, "linear_attention_nnz": 785920, "linear_dense_total": 4718592, "linear_dense_nnz": 2855585}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3484162, "linear_attention_total": 2359296, "linear_attention_nnz": 759424, "linear_dense_total": 4718592, "linear_dense_nnz": 2724738}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114894, "linear_attention_total": 2359296, "linear_attention_nnz": 687040, "linear_dense_total": 4718592, "linear_dense_nnz": 2427854}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2617066, "linear_attention_total": 2359296, "linear_attention_nnz": 603648, "linear_dense_total": 4718592, "linear_dense_nnz": 2013418}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1773359, "linear_attention_total": 2359296, "linear_attention_nnz": 379328, "linear_dense_total": 4718592, "linear_dense_nnz": 1394031}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404175, "linear_attention_total": 2359296, "linear_attention_nnz": 322624, "linear_dense_total": 4718592, "linear_dense_nnz": 1081551}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 763893, "linear_attention_total": 2359296, "linear_attention_nnz": 172288, "linear_dense_total": 4718592, "linear_dense_nnz": 591605}}, "total_sparsity": 46.42043166961797, "linear_sparsity": 59.49742705733687}, "speed": {"eval_elapsed_time": 39.686994375661016, "cuda_eval_elapsed_time": 32.31462289428711}, "opt_eval_metrics": {"exact_match": 81.01229895931883, "f1": 88.16022239737082}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a8-l40-dl1--2021-01-24--15-47-15/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 35298682, "linear_total": 84934656, "linear_nnz": 11382272, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 850432, "linear_attention_total": 2359296, "linear_attention_nnz": 330432, "linear_dense_total": 4718592, "linear_dense_nnz": 520000}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193088, "linear_attention_total": 2359296, "linear_attention_nnz": 468224, "linear_dense_total": 4718592, "linear_dense_nnz": 724864}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1486784, "linear_attention_total": 2359296, "linear_attention_nnz": 511104, "linear_dense_total": 4718592, "linear_dense_nnz": 975680}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1596224, "linear_attention_total": 2359296, "linear_attention_nnz": 688192, "linear_dense_total": 4718592, "linear_dense_nnz": 908032}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1414656, "linear_attention_total": 2359296, "linear_attention_nnz": 551360, "linear_dense_total": 4718592, "linear_dense_nnz": 863296}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253632, "linear_attention_total": 2359296, "linear_attention_nnz": 466304, "linear_dense_total": 4718592, "linear_dense_nnz": 787328}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1147328, "linear_attention_total": 2359296, "linear_attention_nnz": 451840, "linear_dense_total": 4718592, "linear_dense_nnz": 695488}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 973760, "linear_attention_total": 2359296, "linear_attention_nnz": 497920, "linear_dense_total": 4718592, "linear_dense_nnz": 475840}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520128, "linear_attention_total": 2359296, "linear_attention_nnz": 302528, "linear_dense_total": 4718592, "linear_dense_nnz": 217600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 346432, "linear_attention_total": 2359296, "linear_attention_nnz": 255168, "linear_dense_total": 4718592, "linear_dense_nnz": 91264}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 344000, "linear_attention_total": 2359296, "linear_attention_nnz": 206912, "linear_dense_total": 4718592, "linear_dense_nnz": 137088}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 255808, "linear_attention_total": 2359296, "linear_attention_nnz": 127744, "linear_dense_total": 4718592, "linear_dense_nnz": 128064}}, "total_sparsity": 67.5841222976064, "linear_sparsity": 86.59878954475309}, "speed": {"eval_elapsed_time": 26.43846725206822, "cuda_eval_elapsed_time": 19.238733966827393}, "opt_eval_metrics": {"exact_match": 76.87795648060549, "f1": 85.16652519097626}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 63788226, "linear_total": 84934656, "linear_nnz": 39853312, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3867392, "linear_attention_total": 2359296, "linear_attention_nnz": 418816, "linear_dense_total": 4718592, "linear_dense_nnz": 3448576}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4250112, "linear_attention_total": 2359296, "linear_attention_nnz": 553728, "linear_dense_total": 4718592, "linear_dense_nnz": 3696384}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4519424, "linear_attention_total": 2359296, "linear_attention_nnz": 562432, "linear_dense_total": 4718592, "linear_dense_nnz": 3956992}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4809728, "linear_attention_total": 2359296, "linear_attention_nnz": 827392, "linear_dense_total": 4718592, "linear_dense_nnz": 3982336}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4702976, "linear_attention_total": 2359296, "linear_attention_nnz": 790016, "linear_dense_total": 4718592, "linear_dense_nnz": 3912960}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4601344, "linear_attention_total": 2359296, "linear_attention_nnz": 701696, "linear_dense_total": 4718592, "linear_dense_nnz": 3899648}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4243456, "linear_attention_total": 2359296, "linear_attention_nnz": 667392, "linear_dense_total": 4718592, "linear_dense_nnz": 3576064}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3669248, "linear_attention_total": 2359296, "linear_attention_nnz": 700416, "linear_dense_total": 4718592, "linear_dense_nnz": 2968832}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2404096, "linear_attention_total": 2359296, "linear_attention_nnz": 437504, "linear_dense_total": 4718592, "linear_dense_nnz": 1966592}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1054976, "linear_attention_total": 2359296, "linear_attention_nnz": 361472, "linear_dense_total": 4718592, "linear_dense_nnz": 693504}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 946176, "linear_attention_total": 2359296, "linear_attention_nnz": 291584, "linear_dense_total": 4718592, "linear_dense_nnz": 654592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784384, "linear_attention_total": 2359296, "linear_attention_nnz": 168960, "linear_dense_total": 4718592, "linear_dense_nnz": 615424}}, "total_sparsity": 41.42128782970864, "linear_sparsity": 53.077678915895056}, "speed": {"eval_elapsed_time": 33.69302155217156, "cuda_eval_elapsed_time": 26.3544778213501}, "opt_eval_metrics": {"exact_match": 80.02838221381268, "f1": 87.51569063636161}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 63672482, "linear_total": 84934656, "linear_nnz": 39737600, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3861248, "linear_attention_total": 2359296, "linear_attention_nnz": 416256, "linear_dense_total": 4718592, "linear_dense_nnz": 3444992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4250368, "linear_attention_total": 2359296, "linear_attention_nnz": 541952, "linear_dense_total": 4718592, "linear_dense_nnz": 3708416}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4532736, "linear_attention_total": 2359296, "linear_attention_nnz": 555520, "linear_dense_total": 4718592, "linear_dense_nnz": 3977216}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4822784, "linear_attention_total": 2359296, "linear_attention_nnz": 802816, "linear_dense_total": 4718592, "linear_dense_nnz": 4019968}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4714240, "linear_attention_total": 2359296, "linear_attention_nnz": 774400, "linear_dense_total": 4718592, "linear_dense_nnz": 3939840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4584192, "linear_attention_total": 2359296, "linear_attention_nnz": 686592, "linear_dense_total": 4718592, "linear_dense_nnz": 3897600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4225024, "linear_attention_total": 2359296, "linear_attention_nnz": 656384, "linear_dense_total": 4718592, "linear_dense_nnz": 3568640}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3634176, "linear_attention_total": 2359296, "linear_attention_nnz": 676864, "linear_dense_total": 4718592, "linear_dense_nnz": 2957312}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2363904, "linear_attention_total": 2359296, "linear_attention_nnz": 432640, "linear_dense_total": 4718592, "linear_dense_nnz": 1931264}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1030400, "linear_attention_total": 2359296, "linear_attention_nnz": 345344, "linear_dense_total": 4718592, "linear_dense_nnz": 685056}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 933120, "linear_attention_total": 2359296, "linear_attention_nnz": 285184, "linear_dense_total": 4718592, "linear_dense_nnz": 647936}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 785408, "linear_attention_total": 2359296, "linear_attention_nnz": 174080, "linear_dense_total": 4718592, "linear_dense_nnz": 611328}}, "total_sparsity": 41.52757914531035, "linear_sparsity": 53.213915412808646}, "speed": {"eval_elapsed_time": 33.60846929671243, "cuda_eval_elapsed_time": 26.32847610473633}, "opt_eval_metrics": {"exact_match": 79.55534531693472, "f1": 87.439750439335}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 63651698, "linear_total": 84934656, "linear_nnz": 39716864, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3864832, "linear_attention_total": 2359296, "linear_attention_nnz": 417024, "linear_dense_total": 4718592, "linear_dense_nnz": 3447808}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4246016, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 3703296}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4537600, "linear_attention_total": 2359296, "linear_attention_nnz": 555776, "linear_dense_total": 4718592, "linear_dense_nnz": 3981824}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4824576, "linear_attention_total": 2359296, "linear_attention_nnz": 810240, "linear_dense_total": 4718592, "linear_dense_nnz": 4014336}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4704768, "linear_attention_total": 2359296, "linear_attention_nnz": 764160, "linear_dense_total": 4718592, "linear_dense_nnz": 3940608}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4590080, "linear_attention_total": 2359296, "linear_attention_nnz": 685824, "linear_dense_total": 4718592, "linear_dense_nnz": 3904256}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4219136, "linear_attention_total": 2359296, "linear_attention_nnz": 647680, "linear_dense_total": 4718592, "linear_dense_nnz": 3571456}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3640576, "linear_attention_total": 2359296, "linear_attention_nnz": 684288, "linear_dense_total": 4718592, "linear_dense_nnz": 2956288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2360064, "linear_attention_total": 2359296, "linear_attention_nnz": 427264, "linear_dense_total": 4718592, "linear_dense_nnz": 1932800}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1033472, "linear_attention_total": 2359296, "linear_attention_nnz": 350976, "linear_dense_total": 4718592, "linear_dense_nnz": 682496}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 920832, "linear_attention_total": 2359296, "linear_attention_nnz": 273408, "linear_dense_total": 4718592, "linear_dense_nnz": 647424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 774912, "linear_attention_total": 2359296, "linear_attention_nnz": 166400, "linear_dense_total": 4718592, "linear_dense_nnz": 608512}}, "total_sparsity": 41.546665739029805, "linear_sparsity": 53.238329475308646}, "speed": {"eval_elapsed_time": 33.70636031124741, "cuda_eval_elapsed_time": 26.343981628417968}, "opt_eval_metrics": {"exact_match": 79.67833491012298, "f1": 87.29496050765553}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl2--2021-01-21--00-53-13/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 38271273, "linear_total": 84934656, "linear_nnz": 14360064, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 991744, "linear_attention_total": 2359296, "linear_attention_nnz": 864256, "linear_dense_total": 4718592, "linear_dense_nnz": 127488}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 965120, "linear_attention_total": 2359296, "linear_attention_nnz": 748544, "linear_dense_total": 4718592, "linear_dense_nnz": 216576}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1487360, "linear_attention_total": 2359296, "linear_attention_nnz": 1163264, "linear_dense_total": 4718592, "linear_dense_nnz": 324096}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1767424, "linear_attention_total": 2359296, "linear_attention_nnz": 1389568, "linear_dense_total": 4718592, "linear_dense_nnz": 377856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1864704, "linear_attention_total": 2359296, "linear_attention_nnz": 1449984, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1713664, "linear_attention_total": 2359296, "linear_attention_nnz": 1349632, "linear_dense_total": 4718592, "linear_dense_nnz": 364032}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1481216, "linear_attention_total": 2359296, "linear_attention_nnz": 1187840, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1190400, "linear_attention_total": 2359296, "linear_attention_nnz": 964608, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1191424, "linear_attention_total": 2359296, "linear_attention_nnz": 1063936, "linear_dense_total": 4718592, "linear_dense_nnz": 127488}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708608, "linear_attention_total": 2359296, "linear_attention_nnz": 650240, "linear_dense_total": 4718592, "linear_dense_nnz": 58368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 556544, "linear_attention_total": 2359296, "linear_attention_nnz": 502784, "linear_dense_total": 4718592, "linear_dense_nnz": 53760}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 441856, "linear_attention_total": 2359296, "linear_attention_nnz": 360448, "linear_dense_total": 4718592, "linear_dense_nnz": 81408}}, "total_sparsity": 64.85429951512302, "linear_sparsity": 83.0928096064815}, "speed": {"eval_elapsed_time": 24.480217491276562, "cuda_eval_elapsed_time": 17.252509830474853}, "opt_eval_metrics": {"exact_match": 78.67549668874172, "f1": 86.51098653495667}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 59478503, "linear_total": 84934656, "linear_nnz": 35536574, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3362923, "linear_attention_total": 2359296, "linear_attention_nnz": 466432, "linear_dense_total": 4718592, "linear_dense_nnz": 2896491}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3511822, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 2933262}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3642442, "linear_attention_total": 2359296, "linear_attention_nnz": 636672, "linear_dense_total": 4718592, "linear_dense_nnz": 3005770}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3843335, "linear_attention_total": 2359296, "linear_attention_nnz": 857344, "linear_dense_total": 4718592, "linear_dense_nnz": 2985991}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3768311, "linear_attention_total": 2359296, "linear_attention_nnz": 829184, "linear_dense_total": 4718592, "linear_dense_nnz": 2939127}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3670401, "linear_attention_total": 2359296, "linear_attention_nnz": 754432, "linear_dense_total": 4718592, "linear_dense_nnz": 2915969}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3555086, "linear_attention_total": 2359296, "linear_attention_nnz": 767488, "linear_dense_total": 4718592, "linear_dense_nnz": 2787598}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3250234, "linear_attention_total": 2359296, "linear_attention_nnz": 752640, "linear_dense_total": 4718592, "linear_dense_nnz": 2497594}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2669249, "linear_attention_total": 2359296, "linear_attention_nnz": 553472, "linear_dense_total": 4718592, "linear_dense_nnz": 2115777}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1903656, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 1490216}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522534, "linear_attention_total": 2359296, "linear_attention_nnz": 353792, "linear_dense_total": 4718592, "linear_dense_nnz": 1168742}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 836581, "linear_attention_total": 2359296, "linear_attention_nnz": 204032, "linear_dense_total": 4718592, "linear_dense_nnz": 632549}}, "total_sparsity": 45.379040521415185, "linear_sparsity": 58.160101337197375}, "speed": {"eval_elapsed_time": 37.101448519621044, "cuda_eval_elapsed_time": 29.73566310119629}, "opt_eval_metrics": {"exact_match": 80.93661305581836, "f1": 88.29241912882233}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 59470230, "linear_total": 84934656, "linear_nnz": 35528301, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3365714, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 2896466}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3508110, "linear_attention_total": 2359296, "linear_attention_nnz": 574976, "linear_dense_total": 4718592, "linear_dense_nnz": 2933134}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3640290, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 3005666}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3837370, "linear_attention_total": 2359296, "linear_attention_nnz": 851456, "linear_dense_total": 4718592, "linear_dense_nnz": 2985914}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3769702, "linear_attention_total": 2359296, "linear_attention_nnz": 830720, "linear_dense_total": 4718592, "linear_dense_nnz": 2938982}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3672353, "linear_attention_total": 2359296, "linear_attention_nnz": 756480, "linear_dense_total": 4718592, "linear_dense_nnz": 2915873}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3555719, "linear_attention_total": 2359296, "linear_attention_nnz": 768256, "linear_dense_total": 4718592, "linear_dense_nnz": 2787463}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3250893, "linear_attention_total": 2359296, "linear_attention_nnz": 753408, "linear_dense_total": 4718592, "linear_dense_nnz": 2497485}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666597, "linear_attention_total": 2359296, "linear_attention_nnz": 550912, "linear_dense_total": 4718592, "linear_dense_nnz": 2115685}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1903316, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 1490132}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1524282, "linear_attention_total": 2359296, "linear_attention_nnz": 355584, "linear_dense_total": 4718592, "linear_dense_nnz": 1168698}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 833955, "linear_attention_total": 2359296, "linear_attention_nnz": 201472, "linear_dense_total": 4718592, "linear_dense_nnz": 632483}}, "total_sparsity": 45.38663787466004, "linear_sparsity": 58.16984176635742}, "speed": {"eval_elapsed_time": 37.11843426898122, "cuda_eval_elapsed_time": 29.759838722229006}, "opt_eval_metrics": {"exact_match": 80.77578051087986, "f1": 88.22778160568927}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41069735, "linear_total": 84934656, "linear_nnz": 17134148, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825423, "linear_attention_total": 2359296, "linear_attention_nnz": 185152, "linear_dense_total": 4718592, "linear_dense_nnz": 1640271}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2011232, "linear_attention_total": 2359296, "linear_attention_nnz": 309376, "linear_dense_total": 4718592, "linear_dense_nnz": 1701856}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2013521, "linear_attention_total": 2359296, "linear_attention_nnz": 266368, "linear_dense_total": 4718592, "linear_dense_nnz": 1747153}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2151481, "linear_attention_total": 2359296, "linear_attention_nnz": 452288, "linear_dense_total": 4718592, "linear_dense_nnz": 1699193}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1937929, "linear_attention_total": 2359296, "linear_attention_nnz": 315584, "linear_dense_total": 4718592, "linear_dense_nnz": 1622345}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1906344, "linear_attention_total": 2359296, "linear_attention_nnz": 324160, "linear_dense_total": 4718592, "linear_dense_nnz": 1582184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660767, "linear_attention_total": 2359296, "linear_attention_nnz": 264448, "linear_dense_total": 4718592, "linear_dense_nnz": 1396319}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1418922, "linear_attention_total": 2359296, "linear_attention_nnz": 312704, "linear_dense_total": 4718592, "linear_dense_nnz": 1106218}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 973188, "linear_attention_total": 2359296, "linear_attention_nnz": 176128, "linear_dense_total": 4718592, "linear_dense_nnz": 797060}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574608, "linear_attention_total": 2359296, "linear_attention_nnz": 178368, "linear_dense_total": 4718592, "linear_dense_nnz": 396240}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 423537, "linear_attention_total": 2359296, "linear_attention_nnz": 140224, "linear_dense_total": 4718592, "linear_dense_nnz": 283313}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 237196, "linear_attention_total": 2359296, "linear_attention_nnz": 82304, "linear_dense_total": 4718592, "linear_dense_nnz": 154892}}, "total_sparsity": 62.28438480989986, "linear_sparsity": 79.82667051715615}, "speed": {"eval_elapsed_time": 34.77788851317018, "cuda_eval_elapsed_time": 27.427432876586913}, "opt_eval_metrics": {"exact_match": 78.59981078524125, "f1": 86.70965342219107}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40928357, "linear_total": 84934656, "linear_nnz": 16992855, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1811376, "linear_attention_total": 2359296, "linear_attention_nnz": 181120, "linear_dense_total": 4718592, "linear_dense_nnz": 1630256}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1999638, "linear_attention_total": 2359296, "linear_attention_nnz": 307392, "linear_dense_total": 4718592, "linear_dense_nnz": 1692246}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2004326, "linear_attention_total": 2359296, "linear_attention_nnz": 266880, "linear_dense_total": 4718592, "linear_dense_nnz": 1737446}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2132105, "linear_attention_total": 2359296, "linear_attention_nnz": 442304, "linear_dense_total": 4718592, "linear_dense_nnz": 1689801}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1922729, "linear_attention_total": 2359296, "linear_attention_nnz": 309632, "linear_dense_total": 4718592, "linear_dense_nnz": 1613097}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886520, "linear_attention_total": 2359296, "linear_attention_nnz": 313664, "linear_dense_total": 4718592, "linear_dense_nnz": 1572856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1646649, "linear_attention_total": 2359296, "linear_attention_nnz": 259072, "linear_dense_total": 4718592, "linear_dense_nnz": 1387577}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404939, "linear_attention_total": 2359296, "linear_attention_nnz": 306112, "linear_dense_total": 4718592, "linear_dense_nnz": 1098827}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 964630, "linear_attention_total": 2359296, "linear_attention_nnz": 173184, "linear_dense_total": 4718592, "linear_dense_nnz": 791446}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566118, "linear_attention_total": 2359296, "linear_attention_nnz": 172928, "linear_dense_total": 4718592, "linear_dense_nnz": 393190}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417586, "linear_attention_total": 2359296, "linear_attention_nnz": 136448, "linear_dense_total": 4718592, "linear_dense_nnz": 281138}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 236239, "linear_attention_total": 2359296, "linear_attention_nnz": 82304, "linear_dense_total": 4718592, "linear_dense_nnz": 153935}}, "total_sparsity": 62.414216625088, "linear_sparsity": 79.99302546183267}, "speed": {"eval_elapsed_time": 34.82450146274641, "cuda_eval_elapsed_time": 27.48367044067383}, "opt_eval_metrics": {"exact_match": 78.78902554399244, "f1": 86.80367154149816}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 40912185, "linear_total": 84934656, "linear_nnz": 16976675, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1810859, "linear_attention_total": 2359296, "linear_attention_nnz": 180736, "linear_dense_total": 4718592, "linear_dense_nnz": 1630123}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998023, "linear_attention_total": 2359296, "linear_attention_nnz": 305920, "linear_dense_total": 4718592, "linear_dense_nnz": 1692103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2001199, "linear_attention_total": 2359296, "linear_attention_nnz": 263936, "linear_dense_total": 4718592, "linear_dense_nnz": 1737263}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2132118, "linear_attention_total": 2359296, "linear_attention_nnz": 442496, "linear_dense_total": 4718592, "linear_dense_nnz": 1689622}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1919231, "linear_attention_total": 2359296, "linear_attention_nnz": 306304, "linear_dense_total": 4718592, "linear_dense_nnz": 1612927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1884897, "linear_attention_total": 2359296, "linear_attention_nnz": 312128, "linear_dense_total": 4718592, "linear_dense_nnz": 1572769}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1645758, "linear_attention_total": 2359296, "linear_attention_nnz": 258304, "linear_dense_total": 4718592, "linear_dense_nnz": 1387454}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404565, "linear_attention_total": 2359296, "linear_attention_nnz": 305856, "linear_dense_total": 4718592, "linear_dense_nnz": 1098709}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 963790, "linear_attention_total": 2359296, "linear_attention_nnz": 172480, "linear_dense_total": 4718592, "linear_dense_nnz": 791310}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 564071, "linear_attention_total": 2359296, "linear_attention_nnz": 170944, "linear_dense_total": 4718592, "linear_dense_nnz": 393127}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 416716, "linear_attention_total": 2359296, "linear_attention_nnz": 135616, "linear_dense_total": 4718592, "linear_dense_nnz": 281100}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 235448, "linear_attention_total": 2359296, "linear_attention_nnz": 81536, "linear_dense_total": 4718592, "linear_dense_nnz": 153912}}, "total_sparsity": 62.42906787574385, "linear_sparsity": 80.01207540064682}, "speed": {"eval_elapsed_time": 34.80613293591887, "cuda_eval_elapsed_time": 27.478721130371095}, "opt_eval_metrics": {"exact_match": 78.6092715231788, "f1": 86.70267601348202}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39496838, "linear_total": 84934656, "linear_nnz": 15559744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1647204, "linear_attention_total": 2359296, "linear_attention_nnz": 145232, "linear_dense_total": 4718592, "linear_dense_nnz": 1501972}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842586, "linear_attention_total": 2359296, "linear_attention_nnz": 280192, "linear_dense_total": 4718592, "linear_dense_nnz": 1562394}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841850, "linear_attention_total": 2359296, "linear_attention_nnz": 234064, "linear_dense_total": 4718592, "linear_dense_nnz": 1607786}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1960732, "linear_attention_total": 2359296, "linear_attention_nnz": 386752, "linear_dense_total": 4718592, "linear_dense_nnz": 1573980}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1776766, "linear_attention_total": 2359296, "linear_attention_nnz": 281632, "linear_dense_total": 4718592, "linear_dense_nnz": 1495134}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1744230, "linear_attention_total": 2359296, "linear_attention_nnz": 288320, "linear_dense_total": 4718592, "linear_dense_nnz": 1455910}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518906, "linear_attention_total": 2359296, "linear_attention_nnz": 240864, "linear_dense_total": 4718592, "linear_dense_nnz": 1278042}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1276331, "linear_attention_total": 2359296, "linear_attention_nnz": 275424, "linear_dense_total": 4718592, "linear_dense_nnz": 1000907}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 878990, "linear_attention_total": 2359296, "linear_attention_nnz": 170816, "linear_dense_total": 4718592, "linear_dense_nnz": 708174}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496916, "linear_attention_total": 2359296, "linear_attention_nnz": 165920, "linear_dense_total": 4718592, "linear_dense_nnz": 330996}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360001, "linear_attention_total": 2359296, "linear_attention_nnz": 126288, "linear_dense_total": 4718592, "linear_dense_nnz": 233713}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 215232, "linear_attention_total": 2359296, "linear_attention_nnz": 73824, "linear_dense_total": 4718592, "linear_dense_nnz": 141408}}, "total_sparsity": 63.728825052469304, "linear_sparsity": 81.68033552758487}, "speed": {"eval_elapsed_time": 36.33264479693025, "cuda_eval_elapsed_time": 29.016168815612794}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 87.22039562207584}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39357122, "linear_total": 84934656, "linear_nnz": 15420094, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1634237, "linear_attention_total": 2359296, "linear_attention_nnz": 142224, "linear_dense_total": 4718592, "linear_dense_nnz": 1492013}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828361, "linear_attention_total": 2359296, "linear_attention_nnz": 275696, "linear_dense_total": 4718592, "linear_dense_nnz": 1552665}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825967, "linear_attention_total": 2359296, "linear_attention_nnz": 227984, "linear_dense_total": 4718592, "linear_dense_nnz": 1597983}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1943566, "linear_attention_total": 2359296, "linear_attention_nnz": 379616, "linear_dense_total": 4718592, "linear_dense_nnz": 1563950}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1761455, "linear_attention_total": 2359296, "linear_attention_nnz": 275824, "linear_dense_total": 4718592, "linear_dense_nnz": 1485631}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1729290, "linear_attention_total": 2359296, "linear_attention_nnz": 282736, "linear_dense_total": 4718592, "linear_dense_nnz": 1446554}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1504955, "linear_attention_total": 2359296, "linear_attention_nnz": 235856, "linear_dense_total": 4718592, "linear_dense_nnz": 1269099}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1263225, "linear_attention_total": 2359296, "linear_attention_nnz": 269520, "linear_dense_total": 4718592, "linear_dense_nnz": 993705}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870496, "linear_attention_total": 2359296, "linear_attention_nnz": 167616, "linear_dense_total": 4718592, "linear_dense_nnz": 702880}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489695, "linear_attention_total": 2359296, "linear_attention_nnz": 161552, "linear_dense_total": 4718592, "linear_dense_nnz": 328143}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 355803, "linear_attention_total": 2359296, "linear_attention_nnz": 124096, "linear_dense_total": 4718592, "linear_dense_nnz": 231707}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 213044, "linear_attention_total": 2359296, "linear_attention_nnz": 72608, "linear_dense_total": 4718592, "linear_dense_nnz": 140436}}, "total_sparsity": 63.85713060135829, "linear_sparsity": 81.84475604398752}, "speed": {"eval_elapsed_time": 36.40407280996442, "cuda_eval_elapsed_time": 29.089330375671388}, "opt_eval_metrics": {"exact_match": 79.12961210974456, "f1": 87.04337592394437}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 39354055, "linear_total": 84934656, "linear_nnz": 15417031, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1634041, "linear_attention_total": 2359296, "linear_attention_nnz": 142224, "linear_dense_total": 4718592, "linear_dense_nnz": 1491817}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828346, "linear_attention_total": 2359296, "linear_attention_nnz": 275888, "linear_dense_total": 4718592, "linear_dense_nnz": 1552458}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825560, "linear_attention_total": 2359296, "linear_attention_nnz": 227744, "linear_dense_total": 4718592, "linear_dense_nnz": 1597816}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1942802, "linear_attention_total": 2359296, "linear_attention_nnz": 379008, "linear_dense_total": 4718592, "linear_dense_nnz": 1563794}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1761660, "linear_attention_total": 2359296, "linear_attention_nnz": 276192, "linear_dense_total": 4718592, "linear_dense_nnz": 1485468}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1728493, "linear_attention_total": 2359296, "linear_attention_nnz": 282096, "linear_dense_total": 4718592, "linear_dense_nnz": 1446397}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1504843, "linear_attention_total": 2359296, "linear_attention_nnz": 235856, "linear_dense_total": 4718592, "linear_dense_nnz": 1268987}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1262994, "linear_attention_total": 2359296, "linear_attention_nnz": 269456, "linear_dense_total": 4718592, "linear_dense_nnz": 993538}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870263, "linear_attention_total": 2359296, "linear_attention_nnz": 167520, "linear_dense_total": 4718592, "linear_dense_nnz": 702743}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489503, "linear_attention_total": 2359296, "linear_attention_nnz": 161424, "linear_dense_total": 4718592, "linear_dense_nnz": 328079}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 355610, "linear_attention_total": 2359296, "linear_attention_nnz": 123920, "linear_dense_total": 4718592, "linear_dense_nnz": 231690}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 212916, "linear_attention_total": 2359296, "linear_attention_nnz": 72512, "linear_dense_total": 4718592, "linear_dense_nnz": 140404}}, "total_sparsity": 63.859947122862216, "linear_sparsity": 81.84836234575437}, "speed": {"eval_elapsed_time": 36.50873678829521, "cuda_eval_elapsed_time": 29.182387649536132}, "opt_eval_metrics": {"exact_match": 79.09176915799432, "f1": 86.93076968810146}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36724619, "linear_total": 84934656, "linear_nnz": 12816896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 933888, "linear_attention_total": 2359296, "linear_attention_nnz": 522240, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1116160, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1374720, "linear_attention_total": 2359296, "linear_attention_nnz": 675840, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692160, "linear_attention_total": 2359296, "linear_attention_nnz": 977920, "linear_dense_total": 4718592, "linear_dense_nnz": 714240}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1659392, "linear_attention_total": 2359296, "linear_attention_nnz": 825344, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1416192, "linear_attention_total": 2359296, "linear_attention_nnz": 672768, "linear_dense_total": 4718592, "linear_dense_nnz": 743424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1207296, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1235456, "linear_attention_total": 2359296, "linear_attention_nnz": 785408, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 778240, "linear_attention_total": 2359296, "linear_attention_nnz": 514048, "linear_dense_total": 4718592, "linear_dense_nnz": 264192}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 526336, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 101376}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455168, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 421888, "linear_attention_total": 2359296, "linear_attention_nnz": 222208, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}}, "total_sparsity": 66.2746399944621, "linear_sparsity": 84.9096981095679}, "speed": {"eval_elapsed_time": 21.897933847736567, "cuda_eval_elapsed_time": 14.760263885498047}, "opt_eval_metrics": {"exact_match": 78.3349101229896, "f1": 86.4116267700138}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 36711275, "linear_total": 84934656, "linear_nnz": 12803584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 930816, "linear_attention_total": 2359296, "linear_attention_nnz": 519168, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1129472, "linear_attention_total": 2359296, "linear_attention_nnz": 536576, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1366528, "linear_attention_total": 2359296, "linear_attention_nnz": 667648, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1681920, "linear_attention_total": 2359296, "linear_attention_nnz": 967680, "linear_dense_total": 4718592, "linear_dense_nnz": 714240}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1669632, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1412096, "linear_attention_total": 2359296, "linear_attention_nnz": 668672, "linear_dense_total": 4718592, "linear_dense_nnz": 743424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1221632, "linear_attention_total": 2359296, "linear_attention_nnz": 653312, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1237504, "linear_attention_total": 2359296, "linear_attention_nnz": 787456, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 757760, "linear_attention_total": 2359296, "linear_attention_nnz": 493568, "linear_dense_total": 4718592, "linear_dense_nnz": 264192}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 526336, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 101376}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 443904, "linear_attention_total": 2359296, "linear_attention_nnz": 356352, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 425984, "linear_attention_total": 2359296, "linear_attention_nnz": 226304, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}}, "total_sparsity": 66.28689420474849, "linear_sparsity": 84.92537133487654}, "speed": {"eval_elapsed_time": 21.86237431317568, "cuda_eval_elapsed_time": 14.746898548126222}, "opt_eval_metrics": {"exact_match": 78.37275307473983, "f1": 86.39441106336629}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l10--2021-01-20--18-59-37/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 72403618, "linear_total": 84934656, "linear_nnz": 48458624, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4451008, "linear_attention_total": 2359296, "linear_attention_nnz": 446336, "linear_dense_total": 4718592, "linear_dense_nnz": 4004672}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4674880, "linear_attention_total": 2359296, "linear_attention_nnz": 597248, "linear_dense_total": 4718592, "linear_dense_nnz": 4077632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4954368, "linear_attention_total": 2359296, "linear_attention_nnz": 799296, "linear_dense_total": 4718592, "linear_dense_nnz": 4155072}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5115648, "linear_attention_total": 2359296, "linear_attention_nnz": 950208, "linear_dense_total": 4718592, "linear_dense_nnz": 4165440}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5174848, "linear_attention_total": 2359296, "linear_attention_nnz": 1022400, "linear_dense_total": 4718592, "linear_dense_nnz": 4152448}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5020992, "linear_attention_total": 2359296, "linear_attention_nnz": 914368, "linear_dense_total": 4718592, "linear_dense_nnz": 4106624}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4879296, "linear_attention_total": 2359296, "linear_attention_nnz": 918208, "linear_dense_total": 4718592, "linear_dense_nnz": 3961088}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4517696, "linear_attention_total": 2359296, "linear_attention_nnz": 832704, "linear_dense_total": 4718592, "linear_dense_nnz": 3684992}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3922688, "linear_attention_total": 2359296, "linear_attention_nnz": 715648, "linear_dense_total": 4718592, "linear_dense_nnz": 3207040}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2584512, "linear_attention_total": 2359296, "linear_attention_nnz": 467072, "linear_dense_total": 4718592, "linear_dense_nnz": 2117440}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879872, "linear_attention_total": 2359296, "linear_attention_nnz": 362688, "linear_dense_total": 4718592, "linear_dense_nnz": 1517184}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1282816, "linear_attention_total": 2359296, "linear_attention_nnz": 218432, "linear_dense_total": 4718592, "linear_dense_nnz": 1064384}}, "total_sparsity": 33.50950536060172, "linear_sparsity": 42.94599368248457}, "speed": {"eval_elapsed_time": 39.62965265568346, "cuda_eval_elapsed_time": 32.22343955230713}, "opt_eval_metrics": {"exact_match": 81.10690633869442, "f1": 88.3744311515211}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l10--2021-01-20--18-59-37/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 72395170, "linear_total": 84934656, "linear_nnz": 48450176, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4450944, "linear_attention_total": 2359296, "linear_attention_nnz": 446080, "linear_dense_total": 4718592, "linear_dense_nnz": 4004864}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4674240, "linear_attention_total": 2359296, "linear_attention_nnz": 597312, "linear_dense_total": 4718592, "linear_dense_nnz": 4076928}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4955648, "linear_attention_total": 2359296, "linear_attention_nnz": 800192, "linear_dense_total": 4718592, "linear_dense_nnz": 4155456}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5114624, "linear_attention_total": 2359296, "linear_attention_nnz": 948864, "linear_dense_total": 4718592, "linear_dense_nnz": 4165760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5171840, "linear_attention_total": 2359296, "linear_attention_nnz": 1019200, "linear_dense_total": 4718592, "linear_dense_nnz": 4152640}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5023808, "linear_attention_total": 2359296, "linear_attention_nnz": 915392, "linear_dense_total": 4718592, "linear_dense_nnz": 4108416}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4876544, "linear_attention_total": 2359296, "linear_attention_nnz": 916160, "linear_dense_total": 4718592, "linear_dense_nnz": 3960384}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4519232, "linear_attention_total": 2359296, "linear_attention_nnz": 834176, "linear_dense_total": 4718592, "linear_dense_nnz": 3685056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3921792, "linear_attention_total": 2359296, "linear_attention_nnz": 713856, "linear_dense_total": 4718592, "linear_dense_nnz": 3207936}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2581056, "linear_attention_total": 2359296, "linear_attention_nnz": 465600, "linear_dense_total": 4718592, "linear_dense_nnz": 2115456}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879424, "linear_attention_total": 2359296, "linear_attention_nnz": 362048, "linear_dense_total": 4718592, "linear_dense_nnz": 1517376}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1281024, "linear_attention_total": 2359296, "linear_attention_nnz": 217216, "linear_dense_total": 4718592, "linear_dense_nnz": 1063808}}, "total_sparsity": 33.51726342179023, "linear_sparsity": 42.95594015239198}, "speed": {"eval_elapsed_time": 39.64649308426306, "cuda_eval_elapsed_time": 32.25489320373535}, "opt_eval_metrics": {"exact_match": 80.82308420056765, "f1": 88.21300800880684}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 44702229, "linear_total": 84934656, "linear_nnz": 20786688, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1582592, "linear_attention_total": 2359296, "linear_attention_nnz": 1055744, "linear_dense_total": 4718592, "linear_dense_nnz": 526848}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1562624, "linear_attention_total": 2359296, "linear_attention_nnz": 809984, "linear_dense_total": 4718592, "linear_dense_nnz": 752640}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2190848, "linear_attention_total": 2359296, "linear_attention_nnz": 1316864, "linear_dense_total": 4718592, "linear_dense_nnz": 873984}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2420736, "linear_attention_total": 2359296, "linear_attention_nnz": 1468416, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2697728, "linear_attention_total": 2359296, "linear_attention_nnz": 1651712, "linear_dense_total": 4718592, "linear_dense_nnz": 1046016}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2603008, "linear_attention_total": 2359296, "linear_attention_nnz": 1616896, "linear_dense_total": 4718592, "linear_dense_nnz": 986112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102272, "linear_attention_total": 2359296, "linear_attention_nnz": 1361920, "linear_dense_total": 4718592, "linear_dense_nnz": 740352}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1824768, "linear_attention_total": 2359296, "linear_attention_nnz": 1265664, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1505792, "linear_attention_total": 2359296, "linear_attention_nnz": 1212416, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 863232, "linear_attention_total": 2359296, "linear_attention_nnz": 749568, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 652288, "linear_dense_total": 4718592, "linear_dense_nnz": 98304}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 682496, "linear_attention_total": 2359296, "linear_attention_nnz": 419840, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}}, "total_sparsity": 58.94855257518133, "linear_sparsity": 75.52625868055556}, "speed": {"eval_elapsed_time": 26.718373194802552, "cuda_eval_elapsed_time": 19.595643711090087}, "opt_eval_metrics": {"exact_match": 80.58656575212866, "f1": 88.06903108265608}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l10--2021-01-20--18-58-11/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 72878482, "linear_total": 84934656, "linear_nnz": 48937216, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4586496, "linear_attention_total": 2359296, "linear_attention_nnz": 517888, "linear_dense_total": 4718592, "linear_dense_nnz": 4068608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4844288, "linear_attention_total": 2359296, "linear_attention_nnz": 641536, "linear_dense_total": 4718592, "linear_dense_nnz": 4202752}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5155328, "linear_attention_total": 2359296, "linear_attention_nnz": 841472, "linear_dense_total": 4718592, "linear_dense_nnz": 4313856}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5409024, "linear_attention_total": 2359296, "linear_attention_nnz": 1072896, "linear_dense_total": 4718592, "linear_dense_nnz": 4336128}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5385984, "linear_attention_total": 2359296, "linear_attention_nnz": 1068800, "linear_dense_total": 4718592, "linear_dense_nnz": 4317184}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5272832, "linear_attention_total": 2359296, "linear_attention_nnz": 961792, "linear_dense_total": 4718592, "linear_dense_nnz": 4311040}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5128448, "linear_attention_total": 2359296, "linear_attention_nnz": 986880, "linear_dense_total": 4718592, "linear_dense_nnz": 4141568}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4725504, "linear_attention_total": 2359296, "linear_attention_nnz": 905472, "linear_dense_total": 4718592, "linear_dense_nnz": 3820032}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3841792, "linear_attention_total": 2359296, "linear_attention_nnz": 756224, "linear_dense_total": 4718592, "linear_dense_nnz": 3085568}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879808, "linear_attention_total": 2359296, "linear_attention_nnz": 463360, "linear_dense_total": 4718592, "linear_dense_nnz": 1416448}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1505792, "linear_attention_total": 2359296, "linear_attention_nnz": 415488, "linear_dense_total": 4718592, "linear_dense_nnz": 1090304}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1201920, "linear_attention_total": 2359296, "linear_attention_nnz": 254720, "linear_dense_total": 4718592, "linear_dense_nnz": 947200}}, "total_sparsity": 33.07342297799975, "linear_sparsity": 42.38251109182099}, "speed": {"eval_elapsed_time": 37.54532916797325, "cuda_eval_elapsed_time": 30.13610975646973}, "opt_eval_metrics": {"exact_match": 80.93661305581836, "f1": 88.34112193061533}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 34772839, "linear_total": 84934656, "linear_nnz": 10866176, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 732160, "linear_attention_total": 2359296, "linear_attention_nnz": 550912, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835072, "linear_attention_total": 2359296, "linear_attention_nnz": 535552, "linear_dense_total": 4718592, "linear_dense_nnz": 299520}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1128960, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 407040}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1551872, "linear_attention_total": 2359296, "linear_attention_nnz": 1111040, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1389056, "linear_attention_total": 2359296, "linear_attention_nnz": 892928, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1096704, "linear_attention_total": 2359296, "linear_attention_nnz": 663552, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1000448, "linear_attention_total": 2359296, "linear_attention_nnz": 662528, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 801792, "linear_dense_total": 4718592, "linear_dense_nnz": 268800}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 803328, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 498688, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 73728}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 422912, "linear_attention_total": 2359296, "linear_attention_nnz": 364544, "linear_dense_total": 4718592, "linear_dense_nnz": 58368}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 336384, "linear_attention_total": 2359296, "linear_attention_nnz": 239616, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 68.06702028169144, "linear_sparsity": 87.20642843364197}, "speed": {"eval_elapsed_time": 21.374552259687334, "cuda_eval_elapsed_time": 14.268565601348877}, "opt_eval_metrics": {"exact_match": 77.8240302743614, "f1": 86.11992485005756}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l40--2021-01-20--18-59-08/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53223538, "linear_total": 84934656, "linear_nnz": 29295872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2685696, "linear_attention_total": 2359296, "linear_attention_nnz": 331008, "linear_dense_total": 4718592, "linear_dense_nnz": 2354688}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3258624, "linear_attention_total": 2359296, "linear_attention_nnz": 432384, "linear_dense_total": 4718592, "linear_dense_nnz": 2826240}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3726080, "linear_attention_total": 2359296, "linear_attention_nnz": 423936, "linear_dense_total": 4718592, "linear_dense_nnz": 3302144}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3917568, "linear_attention_total": 2359296, "linear_attention_nnz": 669440, "linear_dense_total": 4718592, "linear_dense_nnz": 3248128}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3647232, "linear_attention_total": 2359296, "linear_attention_nnz": 453632, "linear_dense_total": 4718592, "linear_dense_nnz": 3193600}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3593472, "linear_attention_total": 2359296, "linear_attention_nnz": 473856, "linear_dense_total": 4718592, "linear_dense_nnz": 3119616}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2939648, "linear_attention_total": 2359296, "linear_attention_nnz": 445952, "linear_dense_total": 4718592, "linear_dense_nnz": 2493696}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2381824, "linear_attention_total": 2359296, "linear_attention_nnz": 490752, "linear_dense_total": 4718592, "linear_dense_nnz": 1891072}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1384448, "linear_attention_total": 2359296, "linear_attention_nnz": 275712, "linear_dense_total": 4718592, "linear_dense_nnz": 1108736}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 258304, "linear_dense_total": 4718592, "linear_dense_nnz": 348928}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 618752, "linear_attention_total": 2359296, "linear_attention_nnz": 203008, "linear_dense_total": 4718592, "linear_dense_nnz": 415744}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 535296, "linear_attention_total": 2359296, "linear_attention_nnz": 112128, "linear_dense_total": 4718592, "linear_dense_nnz": 423168}}, "total_sparsity": 51.12316945157615, "linear_sparsity": 65.5077522183642}, "speed": {"eval_elapsed_time": 30.60480569722131, "cuda_eval_elapsed_time": 23.35162329864502}, "opt_eval_metrics": {"exact_match": 78.11731315042573, "f1": 86.14927876930865}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l10--2021-01-19--17-00-07/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 56967217, "linear_total": 84934656, "linear_nnz": 33019881, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3067235, "linear_attention_total": 2359296, "linear_attention_nnz": 356016, "linear_dense_total": 4718592, "linear_dense_nnz": 2711219}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3260347, "linear_attention_total": 2359296, "linear_attention_nnz": 506400, "linear_dense_total": 4718592, "linear_dense_nnz": 2753947}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3486676, "linear_attention_total": 2359296, "linear_attention_nnz": 658880, "linear_dense_total": 4718592, "linear_dense_nnz": 2827796}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3592390, "linear_attention_total": 2359296, "linear_attention_nnz": 782176, "linear_dense_total": 4718592, "linear_dense_nnz": 2810214}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3644732, "linear_attention_total": 2359296, "linear_attention_nnz": 874272, "linear_dense_total": 4718592, "linear_dense_nnz": 2770460}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3523230, "linear_attention_total": 2359296, "linear_attention_nnz": 772928, "linear_dense_total": 4718592, "linear_dense_nnz": 2750302}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3378315, "linear_attention_total": 2359296, "linear_attention_nnz": 767984, "linear_dense_total": 4718592, "linear_dense_nnz": 2610331}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2983346, "linear_attention_total": 2359296, "linear_attention_nnz": 687968, "linear_dense_total": 4718592, "linear_dense_nnz": 2295378}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465095, "linear_attention_total": 2359296, "linear_attention_nnz": 596368, "linear_dense_total": 4718592, "linear_dense_nnz": 1868727}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1650223, "linear_attention_total": 2359296, "linear_attention_nnz": 404448, "linear_dense_total": 4718592, "linear_dense_nnz": 1245775}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1262562, "linear_attention_total": 2359296, "linear_attention_nnz": 305952, "linear_dense_total": 4718592, "linear_dense_nnz": 956610}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705730, "linear_attention_total": 2359296, "linear_attention_nnz": 172864, "linear_dense_total": 4718592, "linear_dense_nnz": 532866}}, "total_sparsity": 47.6852325727709, "linear_sparsity": 61.12319451791268}, "speed": {"eval_elapsed_time": 41.833797600120306, "cuda_eval_elapsed_time": 34.458772911071776}, "opt_eval_metrics": {"exact_match": 81.3434247871334, "f1": 88.502960365548}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a16-l20-dl1--2021-01-24--15-45-27/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 43464610, "linear_total": 84934656, "linear_nnz": 19544320, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1306112, "linear_attention_total": 2359296, "linear_attention_nnz": 484864, "linear_dense_total": 4718592, "linear_dense_nnz": 821248}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1851648, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 1247488}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2616576, "linear_attention_total": 2359296, "linear_attention_nnz": 813312, "linear_dense_total": 4718592, "linear_dense_nnz": 1803264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2758400, "linear_attention_total": 2359296, "linear_attention_nnz": 1050880, "linear_dense_total": 4718592, "linear_dense_nnz": 1707520}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2641920, "linear_attention_total": 2359296, "linear_attention_nnz": 1007104, "linear_dense_total": 4718592, "linear_dense_nnz": 1634816}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2344192, "linear_attention_total": 2359296, "linear_attention_nnz": 769792, "linear_dense_total": 4718592, "linear_dense_nnz": 1574400}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1943552, "linear_attention_total": 2359296, "linear_attention_nnz": 749056, "linear_dense_total": 4718592, "linear_dense_nnz": 1194496}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1626880, "linear_attention_total": 2359296, "linear_attention_nnz": 765440, "linear_dense_total": 4718592, "linear_dense_nnz": 861440}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 965376, "linear_attention_total": 2359296, "linear_attention_nnz": 645888, "linear_dense_total": 4718592, "linear_dense_nnz": 319488}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 514048, "linear_attention_total": 2359296, "linear_attention_nnz": 368128, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 544000, "linear_attention_total": 2359296, "linear_attention_nnz": 343296, "linear_dense_total": 4718592, "linear_dense_nnz": 200704}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 431616, "linear_attention_total": 2359296, "linear_attention_nnz": 215296, "linear_dense_total": 4718592, "linear_dense_nnz": 216320}}, "total_sparsity": 60.08509660099393, "linear_sparsity": 76.98899257330247}, "speed": {"eval_elapsed_time": 28.310240568593144, "cuda_eval_elapsed_time": 21.08596396636963}, "opt_eval_metrics": {"exact_match": 78.62819299905392, "f1": 86.57822332702295}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl2--2021-01-21--00-54-43/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 34069864, "linear_total": 84934656, "linear_nnz": 10163200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 674816, "linear_attention_total": 2359296, "linear_attention_nnz": 598016, "linear_dense_total": 4718592, "linear_dense_nnz": 76800}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 621568, "linear_dense_total": 4718592, "linear_dense_nnz": 129024}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1137664, "linear_attention_total": 2359296, "linear_attention_nnz": 937984, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1458176, "linear_attention_total": 2359296, "linear_attention_nnz": 1193984, "linear_dense_total": 4718592, "linear_dense_nnz": 264192}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1335808, "linear_attention_total": 2359296, "linear_attention_nnz": 1057792, "linear_dense_total": 4718592, "linear_dense_nnz": 278016}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 843264, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 948736, "linear_attention_total": 2359296, "linear_attention_nnz": 759808, "linear_dense_total": 4718592, "linear_dense_nnz": 188928}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 979456, "linear_attention_total": 2359296, "linear_attention_nnz": 830464, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 833536, "linear_attention_total": 2359296, "linear_attention_nnz": 753664, "linear_dense_total": 4718592, "linear_dense_nnz": 79872}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 478208, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 46080}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 432128, "linear_attention_total": 2359296, "linear_attention_nnz": 395264, "linear_dense_total": 4718592, "linear_dense_nnz": 36864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 290816, "linear_attention_total": 2359296, "linear_attention_nnz": 238592, "linear_dense_total": 4718592, "linear_dense_nnz": 52224}}, "total_sparsity": 68.71258409134985, "linear_sparsity": 88.03409529320987}, "speed": {"eval_elapsed_time": 21.962527931667864, "cuda_eval_elapsed_time": 14.846498733520509}, "opt_eval_metrics": {"exact_match": 76.9914853358562, "f1": 85.26341062121247}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l20--2021-01-20--19-00-06/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 62877338, "linear_total": 84934656, "linear_nnz": 38938240, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3827456, "linear_attention_total": 2359296, "linear_attention_nnz": 326336, "linear_dense_total": 4718592, "linear_dense_nnz": 3501120}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4141120, "linear_attention_total": 2359296, "linear_attention_nnz": 487552, "linear_dense_total": 4718592, "linear_dense_nnz": 3653568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4289088, "linear_attention_total": 2359296, "linear_attention_nnz": 487616, "linear_dense_total": 4718592, "linear_dense_nnz": 3801472}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4512896, "linear_attention_total": 2359296, "linear_attention_nnz": 712832, "linear_dense_total": 4718592, "linear_dense_nnz": 3800064}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4390144, "linear_attention_total": 2359296, "linear_attention_nnz": 646272, "linear_dense_total": 4718592, "linear_dense_nnz": 3743872}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4316928, "linear_attention_total": 2359296, "linear_attention_nnz": 625600, "linear_dense_total": 4718592, "linear_dense_nnz": 3691328}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4036864, "linear_attention_total": 2359296, "linear_attention_nnz": 575808, "linear_dense_total": 4718592, "linear_dense_nnz": 3461056}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3592320, "linear_attention_total": 2359296, "linear_attention_nnz": 579392, "linear_dense_total": 4718592, "linear_dense_nnz": 3012928}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2753408, "linear_attention_total": 2359296, "linear_attention_nnz": 405632, "linear_dense_total": 4718592, "linear_dense_nnz": 2347776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1318784, "linear_attention_total": 2359296, "linear_attention_nnz": 317440, "linear_dense_total": 4718592, "linear_dense_nnz": 1001344}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 994816, "linear_attention_total": 2359296, "linear_attention_nnz": 238208, "linear_dense_total": 4718592, "linear_dense_nnz": 756608}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 764416, "linear_attention_total": 2359296, "linear_attention_nnz": 141568, "linear_dense_total": 4718592, "linear_dense_nnz": 622848}}, "total_sparsity": 42.257784614732465, "linear_sparsity": 54.1550624517747}, "speed": {"eval_elapsed_time": 36.22357800696045, "cuda_eval_elapsed_time": 28.86345721435547}, "opt_eval_metrics": {"exact_match": 80.05676442762535, "f1": 87.66615713942541}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a16-l10-dl1--2021-01-24--15-45-00/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 54326914, "linear_total": 84934656, "linear_nnz": 30395392, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2378496, "linear_attention_total": 2359296, "linear_attention_nnz": 720896, "linear_dense_total": 4718592, "linear_dense_nnz": 1657600}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2766336, "linear_attention_total": 2359296, "linear_attention_nnz": 719872, "linear_dense_total": 4718592, "linear_dense_nnz": 2046464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3780096, "linear_attention_total": 2359296, "linear_attention_nnz": 1058304, "linear_dense_total": 4718592, "linear_dense_nnz": 2721792}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3934976, "linear_attention_total": 2359296, "linear_attention_nnz": 1227776, "linear_dense_total": 4718592, "linear_dense_nnz": 2707200}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4157696, "linear_attention_total": 2359296, "linear_attention_nnz": 1367808, "linear_dense_total": 4718592, "linear_dense_nnz": 2789888}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3930624, "linear_attention_total": 2359296, "linear_attention_nnz": 1258240, "linear_dense_total": 4718592, "linear_dense_nnz": 2672384}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3266560, "linear_attention_total": 2359296, "linear_attention_nnz": 1130496, "linear_dense_total": 4718592, "linear_dense_nnz": 2136064}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2480128, "linear_attention_total": 2359296, "linear_attention_nnz": 988928, "linear_dense_total": 4718592, "linear_dense_nnz": 1491200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1542144, "linear_attention_total": 2359296, "linear_attention_nnz": 888576, "linear_dense_total": 4718592, "linear_dense_nnz": 653568}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 816384, "linear_attention_total": 2359296, "linear_attention_nnz": 567296, "linear_dense_total": 4718592, "linear_dense_nnz": 249088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 722688, "linear_attention_total": 2359296, "linear_attention_nnz": 450560, "linear_dense_total": 4718592, "linear_dense_nnz": 272128}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619264, "linear_attention_total": 2359296, "linear_attention_nnz": 307456, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}}, "total_sparsity": 50.10990494850615, "linear_sparsity": 64.21320408950618}, "speed": {"eval_elapsed_time": 33.56822411296889, "cuda_eval_elapsed_time": 26.317300163269042}, "opt_eval_metrics": {"exact_match": 80.00946073793756, "f1": 87.65780769915727}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 42128141, "linear_total": 84934656, "linear_nnz": 18215424, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277440, "linear_attention_total": 2359296, "linear_attention_nnz": 643072, "linear_dense_total": 4718592, "linear_dense_nnz": 634368}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1539584, "linear_attention_total": 2359296, "linear_attention_nnz": 622592, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2068480, "linear_attention_total": 2359296, "linear_attention_nnz": 1051648, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2334208, "linear_attention_total": 2359296, "linear_attention_nnz": 1257472, "linear_dense_total": 4718592, "linear_dense_nnz": 1076736}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2473984, "linear_attention_total": 2359296, "linear_attention_nnz": 1315840, "linear_dense_total": 4718592, "linear_dense_nnz": 1158144}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2078208, "linear_attention_total": 2359296, "linear_attention_nnz": 1004544, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1820160, "linear_attention_total": 2359296, "linear_attention_nnz": 1004544, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1555456, "linear_attention_total": 2359296, "linear_attention_nnz": 925696, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236992, "linear_attention_total": 2359296, "linear_attention_nnz": 899072, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 663040, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 576000, "linear_attention_total": 2359296, "linear_attention_nnz": 463872, "linear_dense_total": 4718592, "linear_dense_nnz": 112128}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591872, "linear_attention_total": 2359296, "linear_attention_nnz": 278528, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}}, "total_sparsity": 61.31241765669342, "linear_sparsity": 78.55360243055556}, "speed": {"eval_elapsed_time": 24.523588876239955, "cuda_eval_elapsed_time": 17.326403350830077}, "opt_eval_metrics": {"exact_match": 79.66887417218543, "f1": 87.3881230572442}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 39533983, "linear_total": 84934656, "linear_nnz": 15622656, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1107968, "linear_attention_total": 2359296, "linear_attention_nnz": 809984, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1204736, "linear_attention_total": 2359296, "linear_attention_nnz": 720896, "linear_dense_total": 4718592, "linear_dense_nnz": 483840}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1717760, "linear_attention_total": 2359296, "linear_attention_nnz": 1098752, "linear_dense_total": 4718592, "linear_dense_nnz": 619008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1967104, "linear_attention_total": 2359296, "linear_attention_nnz": 1309696, "linear_dense_total": 4718592, "linear_dense_nnz": 657408}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2067968, "linear_attention_total": 2359296, "linear_attention_nnz": 1362944, "linear_dense_total": 4718592, "linear_dense_nnz": 705024}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742336, "linear_attention_total": 2359296, "linear_attention_nnz": 1074176, "linear_dense_total": 4718592, "linear_dense_nnz": 668160}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1565696, "linear_attention_total": 2359296, "linear_attention_nnz": 1049600, "linear_dense_total": 4718592, "linear_dense_nnz": 516096}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1342464, "linear_attention_total": 2359296, "linear_attention_nnz": 958464, "linear_dense_total": 4718592, "linear_dense_nnz": 384000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1153536, "linear_attention_total": 2359296, "linear_attention_nnz": 949248, "linear_dense_total": 4718592, "linear_dense_nnz": 204288}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 729088, "linear_attention_total": 2359296, "linear_attention_nnz": 636928, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 551936, "linear_attention_total": 2359296, "linear_attention_nnz": 478208, "linear_dense_total": 4718592, "linear_dense_nnz": 73728}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 472064, "linear_attention_total": 2359296, "linear_attention_nnz": 312320, "linear_dense_total": 4718592, "linear_dense_nnz": 159744}}, "total_sparsity": 63.694713643514845, "linear_sparsity": 81.6062644675926}, "speed": {"eval_elapsed_time": 24.182081679347903, "cuda_eval_elapsed_time": 17.057066314697266}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 87.14755939306319}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl2--2021-01-21--00-51-49/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 42626625, "linear_total": 84934656, "linear_nnz": 18712064, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1420800, "linear_attention_total": 2359296, "linear_attention_nnz": 1210368, "linear_dense_total": 4718592, "linear_dense_nnz": 210432}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1381888, "linear_attention_total": 2359296, "linear_attention_nnz": 977920, "linear_dense_total": 4718592, "linear_dense_nnz": 403968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2013184, "linear_attention_total": 2359296, "linear_attention_nnz": 1500160, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2115072, "linear_attention_total": 2359296, "linear_attention_nnz": 1526784, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2395136, "linear_attention_total": 2359296, "linear_attention_nnz": 1734656, "linear_dense_total": 4718592, "linear_dense_nnz": 660480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2211328, "linear_attention_total": 2359296, "linear_attention_nnz": 1659904, "linear_dense_total": 4718592, "linear_dense_nnz": 551424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1943040, "linear_attention_total": 2359296, "linear_attention_nnz": 1486848, "linear_dense_total": 4718592, "linear_dense_nnz": 456192}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1590784, "linear_attention_total": 2359296, "linear_attention_nnz": 1254400, "linear_dense_total": 4718592, "linear_dense_nnz": 336384}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1441280, "linear_attention_total": 2359296, "linear_attention_nnz": 1267712, "linear_dense_total": 4718592, "linear_dense_nnz": 173568}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 837632, "linear_attention_total": 2359296, "linear_attention_nnz": 760832, "linear_dense_total": 4718592, "linear_dense_nnz": 76800}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 781824, "linear_attention_total": 2359296, "linear_attention_nnz": 712704, "linear_dense_total": 4718592, "linear_dense_nnz": 69120}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 580096, "linear_attention_total": 2359296, "linear_attention_nnz": 443392, "linear_dense_total": 4718592, "linear_dense_nnz": 136704}}, "total_sparsity": 60.85464429335368, "linear_sparsity": 77.96887056327161}, "speed": {"eval_elapsed_time": 26.6199238197878, "cuda_eval_elapsed_time": 19.459814723968506}, "opt_eval_metrics": {"exact_match": 80.10406811731315, "f1": 87.56487698206614}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l40--2021-01-19--16-58-18/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 42014844, "linear_total": 84934656, "linear_nnz": 18080164, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1930333, "linear_attention_total": 2359296, "linear_attention_nnz": 211712, "linear_dense_total": 4718592, "linear_dense_nnz": 1718621}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2116878, "linear_attention_total": 2359296, "linear_attention_nnz": 345600, "linear_dense_total": 4718592, "linear_dense_nnz": 1771278}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2094823, "linear_attention_total": 2359296, "linear_attention_nnz": 278016, "linear_dense_total": 4718592, "linear_dense_nnz": 1816807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2266081, "linear_attention_total": 2359296, "linear_attention_nnz": 493312, "linear_dense_total": 4718592, "linear_dense_nnz": 1772769}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1986893, "linear_attention_total": 2359296, "linear_attention_nnz": 304128, "linear_dense_total": 4718592, "linear_dense_nnz": 1682765}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1992507, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 1635131}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1736239, "linear_attention_total": 2359296, "linear_attention_nnz": 278528, "linear_dense_total": 4718592, "linear_dense_nnz": 1457711}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529879, "linear_attention_total": 2359296, "linear_attention_nnz": 355072, "linear_dense_total": 4718592, "linear_dense_nnz": 1174807}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1051417, "linear_attention_total": 2359296, "linear_attention_nnz": 183552, "linear_dense_total": 4718592, "linear_dense_nnz": 867865}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 636321, "linear_attention_total": 2359296, "linear_attention_nnz": 196864, "linear_dense_total": 4718592, "linear_dense_nnz": 439457}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 483651, "linear_attention_total": 2359296, "linear_attention_nnz": 157696, "linear_dense_total": 4718592, "linear_dense_nnz": 325955}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 255142, "linear_attention_total": 2359296, "linear_attention_nnz": 90368, "linear_dense_total": 4718592, "linear_dense_nnz": 164774}}, "total_sparsity": 61.41646181607727, "linear_sparsity": 78.7128542676384}, "speed": {"eval_elapsed_time": 34.25446852017194, "cuda_eval_elapsed_time": 26.916674156188964}, "opt_eval_metrics": {"exact_match": 78.86471144749291, "f1": 86.87223379259328}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l10-dl1--2021-01-24--15-47-42/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 54106194, "linear_total": 84934656, "linear_nnz": 30161424, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2938272, "linear_attention_total": 2359296, "linear_attention_nnz": 528912, "linear_dense_total": 4718592, "linear_dense_nnz": 2409360}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3152560, "linear_attention_total": 2359296, "linear_attention_nnz": 618448, "linear_dense_total": 4718592, "linear_dense_nnz": 2534112}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3506608, "linear_attention_total": 2359296, "linear_attention_nnz": 835904, "linear_dense_total": 4718592, "linear_dense_nnz": 2670704}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3629200, "linear_attention_total": 2359296, "linear_attention_nnz": 958400, "linear_dense_total": 4718592, "linear_dense_nnz": 2670800}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3711680, "linear_attention_total": 2359296, "linear_attention_nnz": 1091248, "linear_dense_total": 4718592, "linear_dense_nnz": 2620432}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3565952, "linear_attention_total": 2359296, "linear_attention_nnz": 1029984, "linear_dense_total": 4718592, "linear_dense_nnz": 2535968}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3251504, "linear_attention_total": 2359296, "linear_attention_nnz": 964544, "linear_dense_total": 4718592, "linear_dense_nnz": 2286960}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2541040, "linear_attention_total": 2359296, "linear_attention_nnz": 813552, "linear_dense_total": 4718592, "linear_dense_nnz": 1727488}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841104, "linear_attention_total": 2359296, "linear_attention_nnz": 744336, "linear_dense_total": 4718592, "linear_dense_nnz": 1096768}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 860464, "linear_attention_total": 2359296, "linear_attention_nnz": 473664, "linear_dense_total": 4718592, "linear_dense_nnz": 386800}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 666832, "linear_attention_total": 2359296, "linear_attention_nnz": 357616, "linear_dense_total": 4718592, "linear_dense_nnz": 309216}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496208, "linear_attention_total": 2359296, "linear_attention_nnz": 219536, "linear_dense_total": 4718592, "linear_dense_nnz": 276672}}, "total_sparsity": 50.31259899035372, "linear_sparsity": 64.48867232711225}, "speed": {"eval_elapsed_time": 40.718972705770284, "cuda_eval_elapsed_time": 33.44704815673828}, "opt_eval_metrics": {"exact_match": 80.9271523178808, "f1": 88.21768668110452}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l20--2021-01-19--17-00-34/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 47377613, "linear_total": 84934656, "linear_nnz": 23436196, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2342065, "linear_attention_total": 2359296, "linear_attention_nnz": 233808, "linear_dense_total": 4718592, "linear_dense_nnz": 2108257}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2536721, "linear_attention_total": 2359296, "linear_attention_nnz": 370912, "linear_dense_total": 4718592, "linear_dense_nnz": 2165809}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2607352, "linear_attention_total": 2359296, "linear_attention_nnz": 368864, "linear_dense_total": 4718592, "linear_dense_nnz": 2238488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2740822, "linear_attention_total": 2359296, "linear_attention_nnz": 528528, "linear_dense_total": 4718592, "linear_dense_nnz": 2212294}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2662766, "linear_attention_total": 2359296, "linear_attention_nnz": 515168, "linear_dense_total": 4718592, "linear_dense_nnz": 2147598}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2582248, "linear_attention_total": 2359296, "linear_attention_nnz": 456576, "linear_dense_total": 4718592, "linear_dense_nnz": 2125672}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2384302, "linear_attention_total": 2359296, "linear_attention_nnz": 426512, "linear_dense_total": 4718592, "linear_dense_nnz": 1957790}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045939, "linear_attention_total": 2359296, "linear_attention_nnz": 424416, "linear_dense_total": 4718592, "linear_dense_nnz": 1621523}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1531552, "linear_attention_total": 2359296, "linear_attention_nnz": 311248, "linear_dense_total": 4718592, "linear_dense_nnz": 1220304}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 936640, "linear_attention_total": 2359296, "linear_attention_nnz": 249120, "linear_dense_total": 4718592, "linear_dense_nnz": 687520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 686193, "linear_attention_total": 2359296, "linear_attention_nnz": 189856, "linear_dense_total": 4718592, "linear_dense_nnz": 496337}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379596, "linear_attention_total": 2359296, "linear_attention_nnz": 106192, "linear_dense_total": 4718592, "linear_dense_nnz": 273404}}, "total_sparsity": 56.49166422589565, "linear_sparsity": 72.40679234634212}, "speed": {"eval_elapsed_time": 38.88521202793345, "cuda_eval_elapsed_time": 31.47156787109375}, "opt_eval_metrics": {"exact_match": 80.54872280037843, "f1": 88.09731480353894}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42067458, "linear_total": 84934656, "linear_nnz": 18108928, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437184, "linear_attention_total": 2359296, "linear_attention_nnz": 472576, "linear_dense_total": 4718592, "linear_dense_nnz": 964608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1754624, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015488, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2276608, "linear_attention_total": 2359296, "linear_attention_nnz": 951040, "linear_dense_total": 4718592, "linear_dense_nnz": 1325568}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2280448, "linear_attention_total": 2359296, "linear_attention_nnz": 861184, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2123008, "linear_attention_total": 2359296, "linear_attention_nnz": 779008, "linear_dense_total": 4718592, "linear_dense_nnz": 1344000}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841152, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1041408}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553664, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042432, "linear_attention_total": 2359296, "linear_attention_nnz": 610816, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 584960, "linear_attention_total": 2359296, "linear_attention_nnz": 405248, "linear_dense_total": 4718592, "linear_dense_nnz": 179712}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 540928, "linear_attention_total": 2359296, "linear_attention_nnz": 395008, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 658432, "linear_attention_total": 2359296, "linear_attention_nnz": 217600, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}}, "total_sparsity": 61.3681447432349, "linear_sparsity": 78.6789882330247}, "speed": {"eval_elapsed_time": 24.593657957855612, "cuda_eval_elapsed_time": 17.41685264968872}, "opt_eval_metrics": {"exact_match": 79.38505203405866, "f1": 87.07610213911921}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41735426, "linear_total": 84934656, "linear_nnz": 17776896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1405440, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 929280}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1732352, "linear_attention_total": 2359296, "linear_attention_nnz": 589568, "linear_dense_total": 4718592, "linear_dense_nnz": 1142784}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1979136, "linear_attention_total": 2359296, "linear_attention_nnz": 628992, "linear_dense_total": 4718592, "linear_dense_nnz": 1350144}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2218752, "linear_attention_total": 2359296, "linear_attention_nnz": 913152, "linear_dense_total": 4718592, "linear_dense_nnz": 1305600}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2257664, "linear_attention_total": 2359296, "linear_attention_nnz": 850688, "linear_dense_total": 4718592, "linear_dense_nnz": 1406976}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096384, "linear_attention_total": 2359296, "linear_attention_nnz": 764672, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1786112, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 1022976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1538816, "linear_attention_total": 2359296, "linear_attention_nnz": 781568, "linear_dense_total": 4718592, "linear_dense_nnz": 757248}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1027840, "linear_attention_total": 2359296, "linear_attention_nnz": 596224, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 176640}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523008, "linear_attention_total": 2359296, "linear_attention_nnz": 378624, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640000, "linear_attention_total": 2359296, "linear_attention_nnz": 208384, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.67306005721974, "linear_sparsity": 79.0699146412037}, "speed": {"eval_elapsed_time": 24.35187277989462, "cuda_eval_elapsed_time": 17.205588718414308}, "opt_eval_metrics": {"exact_match": 78.72280037842951, "f1": 86.62745564109652}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 43462146, "linear_total": 84934656, "linear_nnz": 19503616, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660672, "linear_attention_total": 2359296, "linear_attention_nnz": 579328, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 632576, "linear_dense_total": 4718592, "linear_dense_nnz": 1267200}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2031104, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1446912}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2544128, "linear_attention_total": 2359296, "linear_attention_nnz": 1049600, "linear_dense_total": 4718592, "linear_dense_nnz": 1494528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2395904, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 1479168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2184960, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 1394688}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1912320, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1113600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 969216, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 471808, "linear_dense_total": 4718592, "linear_dense_nnz": 497664}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 717312, "linear_attention_total": 2359296, "linear_attention_nnz": 505344, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631040, "linear_attention_total": 2359296, "linear_attention_nnz": 448256, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 288256, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}}, "total_sparsity": 60.08735936884057, "linear_sparsity": 77.03691647376543}, "speed": {"eval_elapsed_time": 22.9045692961663, "cuda_eval_elapsed_time": 15.710145233154297}, "opt_eval_metrics": {"exact_match": 78.93093661305582, "f1": 86.85787750084084}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42678018, "linear_total": 84934656, "linear_nnz": 18719488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1560320, "linear_attention_total": 2359296, "linear_attention_nnz": 543488, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1816320, "linear_attention_total": 2359296, "linear_attention_nnz": 593664, "linear_dense_total": 4718592, "linear_dense_nnz": 1222656}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2024704, "linear_attention_total": 2359296, "linear_attention_nnz": 603904, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2329856, "linear_attention_total": 2359296, "linear_attention_nnz": 870656, "linear_dense_total": 4718592, "linear_dense_nnz": 1459200}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2332928, "linear_attention_total": 2359296, "linear_attention_nnz": 887552, "linear_dense_total": 4718592, "linear_dense_nnz": 1445376}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 720640, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742080, "linear_attention_total": 2359296, "linear_attention_nnz": 926464, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 944384, "linear_attention_total": 2359296, "linear_attention_nnz": 455936, "linear_dense_total": 4718592, "linear_dense_nnz": 488448}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705280, "linear_attention_total": 2359296, "linear_attention_nnz": 505600, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 587264, "linear_attention_total": 2359296, "linear_attention_nnz": 409088, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 697856, "linear_attention_total": 2359296, "linear_attention_nnz": 250880, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}}, "total_sparsity": 60.80744850279245, "linear_sparsity": 77.96012972608024}, "speed": {"eval_elapsed_time": 22.627552575897425, "cuda_eval_elapsed_time": 15.454076248168946}, "opt_eval_metrics": {"exact_match": 78.78902554399244, "f1": 86.64151988736798}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 46192898, "linear_total": 84934656, "linear_nnz": 22234368, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 551680, "linear_dense_total": 4718592, "linear_dense_nnz": 1539072}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2278656, "linear_attention_total": 2359296, "linear_attention_nnz": 596736, "linear_dense_total": 4718592, "linear_dense_nnz": 1681920}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2418688, "linear_attention_total": 2359296, "linear_attention_nnz": 567808, "linear_dense_total": 4718592, "linear_dense_nnz": 1850880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2844416, "linear_attention_total": 2359296, "linear_attention_nnz": 1002752, "linear_dense_total": 4718592, "linear_dense_nnz": 1841664}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2691072, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 1812480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2475264, "linear_attention_total": 2359296, "linear_attention_nnz": 721152, "linear_dense_total": 4718592, "linear_dense_nnz": 1754112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2229248, "linear_attention_total": 2359296, "linear_attention_nnz": 805376, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1966336, "linear_attention_total": 2359296, "linear_attention_nnz": 892672, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701440, "linear_attention_total": 2359296, "linear_attention_nnz": 454144, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598272, "linear_attention_total": 2359296, "linear_attention_nnz": 361728, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858880, "linear_attention_total": 2359296, "linear_attention_nnz": 238336, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}}, "total_sparsity": 57.57962486284496, "linear_sparsity": 73.82179542824075}, "speed": {"eval_elapsed_time": 24.343701715115458, "cuda_eval_elapsed_time": 17.153405197143556}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 86.84346997900737}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-130000": {"stats": {"total": 108893186, "nnz": 38778370, "linear_total": 84934656, "linear_nnz": 14819840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1050624, "linear_attention_total": 2359296, "linear_attention_nnz": 488448, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 512512, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1628160, "linear_attention_total": 2359296, "linear_attention_nnz": 628224, "linear_dense_total": 4718592, "linear_dense_nnz": 999936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998592, "linear_attention_total": 2359296, "linear_attention_nnz": 937216, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1939968, "linear_attention_total": 2359296, "linear_attention_nnz": 821760, "linear_dense_total": 4718592, "linear_dense_nnz": 1118208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1709824, "linear_attention_total": 2359296, "linear_attention_nnz": 648448, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404928, "linear_attention_total": 2359296, "linear_attention_nnz": 641536, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817920, "linear_attention_total": 2359296, "linear_attention_nnz": 467712, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 544512, "linear_attention_total": 2359296, "linear_attention_nnz": 403200, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484096, "linear_attention_total": 2359296, "linear_attention_nnz": 367360, "linear_dense_total": 4718592, "linear_dense_nnz": 116736}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496896, "linear_attention_total": 2359296, "linear_attention_nnz": 225024, "linear_dense_total": 4718592, "linear_dense_nnz": 271872}}, "total_sparsity": 64.38861656596218, "linear_sparsity": 82.5514805169753}, "speed": {"eval_elapsed_time": 22.581966675817966, "cuda_eval_elapsed_time": 15.435867366790772}, "opt_eval_metrics": {"exact_match": 78.88363292336803, "f1": 86.63235572290178}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-165000": {"stats": {"total": 108893186, "nnz": 38293506, "linear_total": 84934656, "linear_nnz": 14334976, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1010688, "linear_attention_total": 2359296, "linear_attention_nnz": 468480, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1371392, "linear_attention_total": 2359296, "linear_attention_nnz": 518912, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1590272, "linear_attention_total": 2359296, "linear_attention_nnz": 608768, "linear_dense_total": 4718592, "linear_dense_nnz": 981504}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895936, "linear_attention_total": 2359296, "linear_attention_nnz": 869888, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1869568, "linear_attention_total": 2359296, "linear_attention_nnz": 775936, "linear_dense_total": 4718592, "linear_dense_nnz": 1093632}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663232, "linear_attention_total": 2359296, "linear_attention_nnz": 618752, "linear_dense_total": 4718592, "linear_dense_nnz": 1044480}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 629248, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1295872, "linear_attention_total": 2359296, "linear_attention_nnz": 707584, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 808704, "linear_attention_total": 2359296, "linear_attention_nnz": 463104, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 515840, "linear_attention_total": 2359296, "linear_attention_nnz": 376064, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455936, "linear_attention_total": 2359296, "linear_attention_nnz": 345344, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 212992, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 64.83388225963009, "linear_sparsity": 83.1223476080247}, "speed": {"eval_elapsed_time": 22.516427854076028, "cuda_eval_elapsed_time": 15.359982524871826}, "opt_eval_metrics": {"exact_match": 78.96877956480606, "f1": 86.71968503618079}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38916354, "linear_total": 84934656, "linear_nnz": 14957824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1209344, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 749568}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1494272, "linear_attention_total": 2359296, "linear_attention_nnz": 488192, "linear_dense_total": 4718592, "linear_dense_nnz": 1006080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1636096, "linear_attention_total": 2359296, "linear_attention_nnz": 550144, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969664, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1746944, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 1198080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1782272, "linear_attention_total": 2359296, "linear_attention_nnz": 653312, "linear_dense_total": 4718592, "linear_dense_nnz": 1128960}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1461760, "linear_attention_total": 2359296, "linear_attention_nnz": 593920, "linear_dense_total": 4718592, "linear_dense_nnz": 867840}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 754688, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531968, "linear_attention_total": 2359296, "linear_attention_nnz": 373760, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460032, "linear_attention_total": 2359296, "linear_attention_nnz": 311040, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}}, "total_sparsity": 64.26190156654981, "linear_sparsity": 82.38902150848766}, "speed": {"eval_elapsed_time": 22.226274209097028, "cuda_eval_elapsed_time": 15.051653835296632}, "opt_eval_metrics": {"exact_match": 78.7038789025544, "f1": 86.58426699451658}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 50872322, "linear_total": 84934656, "linear_nnz": 26913792, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692352, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 2007552}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666496, "linear_attention_total": 2359296, "linear_attention_nnz": 646656, "linear_dense_total": 4718592, "linear_dense_nnz": 2019840}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2931200, "linear_attention_total": 2359296, "linear_attention_nnz": 691712, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3361024, "linear_attention_total": 2359296, "linear_attention_nnz": 1149184, "linear_dense_total": 4718592, "linear_dense_nnz": 2211840}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3165952, "linear_attention_total": 2359296, "linear_attention_nnz": 1007872, "linear_dense_total": 4718592, "linear_dense_nnz": 2158080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070976, "linear_attention_total": 2359296, "linear_attention_nnz": 997376, "linear_dense_total": 4718592, "linear_dense_nnz": 2073600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2644480, "linear_attention_total": 2359296, "linear_attention_nnz": 911872, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2248704, "linear_attention_total": 2359296, "linear_attention_nnz": 944640, "linear_dense_total": 4718592, "linear_dense_nnz": 1304064}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1514240, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 751104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 839424, "linear_attention_total": 2359296, "linear_attention_nnz": 526080, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 707072, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 274944}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1071872, "linear_attention_total": 2359296, "linear_attention_nnz": 277760, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}}, "total_sparsity": 53.282364242699266, "linear_sparsity": 68.31235532407408}, "speed": {"eval_elapsed_time": 26.613120706751943, "cuda_eval_elapsed_time": 19.37784966278076}, "opt_eval_metrics": {"exact_match": 79.99053926206244, "f1": 87.56439208763325}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl225_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 27752545, "linear_total": 84934656, "linear_nnz": 3794015, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 465383, "linear_attention_total": 2359296, "linear_attention_nnz": 18728, "linear_dense_total": 4718592, "linear_dense_nnz": 446655}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527397, "linear_attention_total": 2359296, "linear_attention_nnz": 63059, "linear_dense_total": 4718592, "linear_dense_nnz": 464338}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516492, "linear_attention_total": 2359296, "linear_attention_nnz": 53761, "linear_dense_total": 4718592, "linear_dense_nnz": 462731}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 514972, "linear_attention_total": 2359296, "linear_attention_nnz": 84624, "linear_dense_total": 4718592, "linear_dense_nnz": 430348}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 443214, "linear_attention_total": 2359296, "linear_attention_nnz": 58345, "linear_dense_total": 4718592, "linear_dense_nnz": 384869}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 396921, "linear_attention_total": 2359296, "linear_attention_nnz": 50615, "linear_dense_total": 4718592, "linear_dense_nnz": 346306}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 319004, "linear_attention_total": 2359296, "linear_attention_nnz": 41344, "linear_dense_total": 4718592, "linear_dense_nnz": 277660}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 249183, "linear_attention_total": 2359296, "linear_attention_nnz": 47420, "linear_dense_total": 4718592, "linear_dense_nnz": 201763}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 161062, "linear_attention_total": 2359296, "linear_attention_nnz": 27562, "linear_dense_total": 4718592, "linear_dense_nnz": 133500}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 81705, "linear_attention_total": 2359296, "linear_attention_nnz": 34151, "linear_dense_total": 4718592, "linear_dense_nnz": 47554}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64643, "linear_attention_total": 2359296, "linear_attention_nnz": 21311, "linear_dense_total": 4718592, "linear_dense_nnz": 43332}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 54039, "linear_attention_total": 2359296, "linear_attention_nnz": 17233, "linear_dense_total": 4718592, "linear_dense_nnz": 36806}}, "total_sparsity": 74.51397463933142, "linear_sparsity": 95.5330189363456}, "speed": {"eval_elapsed_time": 35.06419681990519, "cuda_eval_elapsed_time": 27.713626304626466}, "opt_eval_metrics": {"exact_match": 77.39829706717124, "f1": 85.66626983371626}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41730197, "linear_total": 84934656, "linear_nnz": 17822208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2202624, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2397696, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1218048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2302464, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1122816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1794048, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1460736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1155072, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1290240, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 503808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 423936, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 227328}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 806400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 904704, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 511488}}, "total_sparsity": 61.67786201057612, "linear_sparsity": 79.0165653935185}, "speed": {"eval_elapsed_time": 19.25184288667515, "cuda_eval_elapsed_time": 12.066676223754882}, "opt_eval_metrics": {"exact_match": 77.70104068117313, "f1": 85.6071153919288}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 33917936, "linear_total": 84934656, "linear_nnz": 9959406, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1111233, "linear_attention_total": 2359296, "linear_attention_nnz": 56754, "linear_dense_total": 4718592, "linear_dense_nnz": 1054479}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222867, "linear_attention_total": 2359296, "linear_attention_nnz": 116764, "linear_dense_total": 4718592, "linear_dense_nnz": 1106103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264439, "linear_attention_total": 2359296, "linear_attention_nnz": 127558, "linear_dense_total": 4718592, "linear_dense_nnz": 1136881}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1270104, "linear_attention_total": 2359296, "linear_attention_nnz": 163709, "linear_dense_total": 4718592, "linear_dense_nnz": 1106395}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1202300, "linear_attention_total": 2359296, "linear_attention_nnz": 158018, "linear_dense_total": 4718592, "linear_dense_nnz": 1044282}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1136195, "linear_attention_total": 2359296, "linear_attention_nnz": 125746, "linear_dense_total": 4718592, "linear_dense_nnz": 1010449}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 971117, "linear_attention_total": 2359296, "linear_attention_nnz": 110023, "linear_dense_total": 4718592, "linear_dense_nnz": 861094}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746075, "linear_attention_total": 2359296, "linear_attention_nnz": 113086, "linear_dense_total": 4718592, "linear_dense_nnz": 632989}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488971, "linear_attention_total": 2359296, "linear_attention_nnz": 81879, "linear_dense_total": 4718592, "linear_dense_nnz": 407092}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 250695, "linear_attention_total": 2359296, "linear_attention_nnz": 77365, "linear_dense_total": 4718592, "linear_dense_nnz": 173330}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 172793, "linear_attention_total": 2359296, "linear_attention_nnz": 50915, "linear_dense_total": 4718592, "linear_dense_nnz": 121878}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 122617, "linear_attention_total": 2359296, "linear_attention_nnz": 28303, "linear_dense_total": 4718592, "linear_dense_nnz": 94314}}, "total_sparsity": 68.85210429971255, "linear_sparsity": 88.27403739646628}, "speed": {"eval_elapsed_time": 40.38167083170265, "cuda_eval_elapsed_time": 32.98558323669434}, "opt_eval_metrics": {"exact_match": 79.89593188268685, "f1": 87.64967103979136}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33825359, "linear_total": 84934656, "linear_nnz": 9866829, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1100628, "linear_attention_total": 2359296, "linear_attention_nnz": 56086, "linear_dense_total": 4718592, "linear_dense_nnz": 1044542}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211778, "linear_attention_total": 2359296, "linear_attention_nnz": 115328, "linear_dense_total": 4718592, "linear_dense_nnz": 1096450}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253069, "linear_attention_total": 2359296, "linear_attention_nnz": 125881, "linear_dense_total": 4718592, "linear_dense_nnz": 1127188}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258511, "linear_attention_total": 2359296, "linear_attention_nnz": 161525, "linear_dense_total": 4718592, "linear_dense_nnz": 1096986}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1191705, "linear_attention_total": 2359296, "linear_attention_nnz": 155911, "linear_dense_total": 4718592, "linear_dense_nnz": 1035794}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125428, "linear_attention_total": 2359296, "linear_attention_nnz": 123921, "linear_dense_total": 4718592, "linear_dense_nnz": 1001507}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 961919, "linear_attention_total": 2359296, "linear_attention_nnz": 108430, "linear_dense_total": 4718592, "linear_dense_nnz": 853489}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738628, "linear_attention_total": 2359296, "linear_attention_nnz": 111505, "linear_dense_total": 4718592, "linear_dense_nnz": 627123}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484188, "linear_attention_total": 2359296, "linear_attention_nnz": 80805, "linear_dense_total": 4718592, "linear_dense_nnz": 403383}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 247948, "linear_attention_total": 2359296, "linear_attention_nnz": 76456, "linear_dense_total": 4718592, "linear_dense_nnz": 171492}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 171235, "linear_attention_total": 2359296, "linear_attention_nnz": 50374, "linear_dense_total": 4718592, "linear_dense_nnz": 120861}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 121792, "linear_attention_total": 2359296, "linear_attention_nnz": 28038, "linear_dense_total": 4718592, "linear_dense_nnz": 93754}}, "total_sparsity": 68.93712063856779, "linear_sparsity": 88.38303530657733}, "speed": {"eval_elapsed_time": 40.42444095481187, "cuda_eval_elapsed_time": 33.06226232147217}, "opt_eval_metrics": {"exact_match": 79.8391674550615, "f1": 87.59923644792065}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl150_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 29470276, "linear_total": 84934656, "linear_nnz": 5511746, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655184, "linear_attention_total": 2359296, "linear_attention_nnz": 30729, "linear_dense_total": 4718592, "linear_dense_nnz": 624455}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 733131, "linear_attention_total": 2359296, "linear_attention_nnz": 77742, "linear_dense_total": 4718592, "linear_dense_nnz": 655389}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730379, "linear_attention_total": 2359296, "linear_attention_nnz": 70206, "linear_dense_total": 4718592, "linear_dense_nnz": 660173}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 734451, "linear_attention_total": 2359296, "linear_attention_nnz": 106339, "linear_dense_total": 4718592, "linear_dense_nnz": 628112}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655863, "linear_attention_total": 2359296, "linear_attention_nnz": 81845, "linear_dense_total": 4718592, "linear_dense_nnz": 574018}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 606306, "linear_attention_total": 2359296, "linear_attention_nnz": 68554, "linear_dense_total": 4718592, "linear_dense_nnz": 537752}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 492846, "linear_attention_total": 2359296, "linear_attention_nnz": 58217, "linear_dense_total": 4718592, "linear_dense_nnz": 434629}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379389, "linear_attention_total": 2359296, "linear_attention_nnz": 65705, "linear_dense_total": 4718592, "linear_dense_nnz": 313684}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 243207, "linear_attention_total": 2359296, "linear_attention_nnz": 39483, "linear_dense_total": 4718592, "linear_dense_nnz": 203724}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 119606, "linear_attention_total": 2359296, "linear_attention_nnz": 46007, "linear_dense_total": 4718592, "linear_dense_nnz": 73599}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 89281, "linear_attention_total": 2359296, "linear_attention_nnz": 27892, "linear_dense_total": 4718592, "linear_dense_nnz": 61389}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 72103, "linear_attention_total": 2359296, "linear_attention_nnz": 20781, "linear_dense_total": 4718592, "linear_dense_nnz": 51322}}, "total_sparsity": 72.93652882926945, "linear_sparsity": 93.51060419906804}, "speed": {"eval_elapsed_time": 37.12324417894706, "cuda_eval_elapsed_time": 29.783737594604492}, "opt_eval_metrics": {"exact_match": 78.4484389782403, "f1": 86.3547925481507}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 72624802, "linear_total": 84934656, "linear_nnz": 48687104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4657152, "linear_attention_total": 2359296, "linear_attention_nnz": 621568, "linear_dense_total": 4718592, "linear_dense_nnz": 4035584}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4759552, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 4155392}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5065728, "linear_attention_total": 2359296, "linear_attention_nnz": 781312, "linear_dense_total": 4718592, "linear_dense_nnz": 4284416}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5408768, "linear_attention_total": 2359296, "linear_attention_nnz": 1068032, "linear_dense_total": 4718592, "linear_dense_nnz": 4340736}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5324800, "linear_attention_total": 2359296, "linear_attention_nnz": 1087488, "linear_dense_total": 4718592, "linear_dense_nnz": 4237312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5189632, "linear_attention_total": 2359296, "linear_attention_nnz": 908288, "linear_dense_total": 4718592, "linear_dense_nnz": 4281344}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5169152, "linear_attention_total": 2359296, "linear_attention_nnz": 1019904, "linear_dense_total": 4718592, "linear_dense_nnz": 4149248}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4749312, "linear_attention_total": 2359296, "linear_attention_nnz": 921600, "linear_dense_total": 4718592, "linear_dense_nnz": 3827712}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3932160, "linear_attention_total": 2359296, "linear_attention_nnz": 851968, "linear_dense_total": 4718592, "linear_dense_nnz": 3080192}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1808384, "linear_attention_total": 2359296, "linear_attention_nnz": 529408, "linear_dense_total": 4718592, "linear_dense_nnz": 1278976}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1443840, "linear_attention_total": 2359296, "linear_attention_nnz": 486400, "linear_dense_total": 4718592, "linear_dense_nnz": 957440}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178624, "linear_attention_total": 2359296, "linear_attention_nnz": 286720, "linear_dense_total": 4718592, "linear_dense_nnz": 891904}}, "total_sparsity": 33.306385213120684, "linear_sparsity": 42.67698688271605}, "speed": {"eval_elapsed_time": 34.635603360366076, "cuda_eval_elapsed_time": 27.293812591552737}, "opt_eval_metrics": {"exact_match": 80.72847682119205, "f1": 88.08831525592305}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 72671586, "linear_total": 84934656, "linear_nnz": 48734208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4676608, "linear_attention_total": 2359296, "linear_attention_nnz": 644096, "linear_dense_total": 4718592, "linear_dense_nnz": 4032512}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4756480, "linear_attention_total": 2359296, "linear_attention_nnz": 583680, "linear_dense_total": 4718592, "linear_dense_nnz": 4172800}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5113856, "linear_attention_total": 2359296, "linear_attention_nnz": 789504, "linear_dense_total": 4718592, "linear_dense_nnz": 4324352}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5421056, "linear_attention_total": 2359296, "linear_attention_nnz": 1028096, "linear_dense_total": 4718592, "linear_dense_nnz": 4392960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5426176, "linear_attention_total": 2359296, "linear_attention_nnz": 1067008, "linear_dense_total": 4718592, "linear_dense_nnz": 4359168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5276672, "linear_attention_total": 2359296, "linear_attention_nnz": 943104, "linear_dense_total": 4718592, "linear_dense_nnz": 4333568}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5165056, "linear_attention_total": 2359296, "linear_attention_nnz": 1003520, "linear_dense_total": 4718592, "linear_dense_nnz": 4161536}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4797440, "linear_attention_total": 2359296, "linear_attention_nnz": 908288, "linear_dense_total": 4718592, "linear_dense_nnz": 3889152}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3890176, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 3021824}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1726464, "linear_attention_total": 2359296, "linear_attention_nnz": 520192, "linear_dense_total": 4718592, "linear_dense_nnz": 1206272}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1336320, "linear_attention_total": 2359296, "linear_attention_nnz": 445440, "linear_dense_total": 4718592, "linear_dense_nnz": 890880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1147904, "linear_attention_total": 2359296, "linear_attention_nnz": 272384, "linear_dense_total": 4718592, "linear_dense_nnz": 875520}}, "total_sparsity": 33.26342201062975, "linear_sparsity": 42.62152777777778}, "speed": {"eval_elapsed_time": 34.61669071530923, "cuda_eval_elapsed_time": 27.2810027923584}, "opt_eval_metrics": {"exact_match": 80.81362346263009, "f1": 88.10463591853348}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 72950082, "linear_total": 84934656, "linear_nnz": 49012736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4754432, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 4140032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4843520, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 4246528}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5145600, "linear_attention_total": 2359296, "linear_attention_nnz": 788480, "linear_dense_total": 4718592, "linear_dense_nnz": 4357120}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5488640, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 4426752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5463040, "linear_attention_total": 2359296, "linear_attention_nnz": 1048576, "linear_dense_total": 4718592, "linear_dense_nnz": 4414464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5317632, "linear_attention_total": 2359296, "linear_attention_nnz": 918528, "linear_dense_total": 4718592, "linear_dense_nnz": 4399104}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5230592, "linear_attention_total": 2359296, "linear_attention_nnz": 998400, "linear_dense_total": 4718592, "linear_dense_nnz": 4232192}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4838400, "linear_attention_total": 2359296, "linear_attention_nnz": 899072, "linear_dense_total": 4718592, "linear_dense_nnz": 3939328}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3848192, "linear_attention_total": 2359296, "linear_attention_nnz": 819200, "linear_dense_total": 4718592, "linear_dense_nnz": 3028992}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1620992, "linear_attention_total": 2359296, "linear_attention_nnz": 516096, "linear_dense_total": 4718592, "linear_dense_nnz": 1104896}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1332224, "linear_attention_total": 2359296, "linear_attention_nnz": 450560, "linear_dense_total": 4718592, "linear_dense_nnz": 881664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1129472, "linear_attention_total": 2359296, "linear_attention_nnz": 266240, "linear_dense_total": 4718592, "linear_dense_nnz": 863232}}, "total_sparsity": 33.00767047076757, "linear_sparsity": 42.29359567901234}, "speed": {"eval_elapsed_time": 34.5833341376856, "cuda_eval_elapsed_time": 27.25869842529297}, "opt_eval_metrics": {"exact_match": 80.53926206244087, "f1": 87.95145431777735}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39356610, "linear_total": 84934656, "linear_nnz": 15444992, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1024000, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 512000}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236992, "linear_attention_total": 2359296, "linear_attention_nnz": 551936, "linear_dense_total": 4718592, "linear_dense_nnz": 685056}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1934336, "linear_attention_total": 2359296, "linear_attention_nnz": 722944, "linear_dense_total": 4718592, "linear_dense_nnz": 1211392}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2352128, "linear_attention_total": 2359296, "linear_attention_nnz": 954368, "linear_dense_total": 4718592, "linear_dense_nnz": 1397760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028544, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1238016}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1880064, "linear_attention_total": 2359296, "linear_attention_nnz": 584704, "linear_dense_total": 4718592, "linear_dense_nnz": 1295360}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1627136, "linear_attention_total": 2359296, "linear_attention_nnz": 608256, "linear_dense_total": 4718592, "linear_dense_nnz": 1018880}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1316864, "linear_attention_total": 2359296, "linear_attention_nnz": 740352, "linear_dense_total": 4718592, "linear_dense_nnz": 576512}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 673792, "linear_attention_total": 2359296, "linear_attention_nnz": 510976, "linear_dense_total": 4718592, "linear_dense_nnz": 162816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 451584, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 94208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 304128, "linear_dense_total": 4718592, "linear_dense_nnz": 197632}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417792, "linear_attention_total": 2359296, "linear_attention_nnz": 197632, "linear_dense_total": 4718592, "linear_dense_nnz": 220160}}, "total_sparsity": 63.85760078688487, "linear_sparsity": 81.81544174382715}, "speed": {"eval_elapsed_time": 24.01387820020318, "cuda_eval_elapsed_time": 16.872496753692626}, "opt_eval_metrics": {"exact_match": 76.79280983916746, "f1": 85.3167029862563}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39183362, "linear_total": 84934656, "linear_nnz": 15271936, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1032192, "linear_attention_total": 2359296, "linear_attention_nnz": 513024, "linear_dense_total": 4718592, "linear_dense_nnz": 519168}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1215488, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 692224}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1922048, "linear_attention_total": 2359296, "linear_attention_nnz": 683008, "linear_dense_total": 4718592, "linear_dense_nnz": 1239040}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2319360, "linear_attention_total": 2359296, "linear_attention_nnz": 945152, "linear_dense_total": 4718592, "linear_dense_nnz": 1374208}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045952, "linear_attention_total": 2359296, "linear_attention_nnz": 809984, "linear_dense_total": 4718592, "linear_dense_nnz": 1235968}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1847296, "linear_attention_total": 2359296, "linear_attention_nnz": 581632, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1607680, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1287168, "linear_attention_total": 2359296, "linear_attention_nnz": 708608, "linear_dense_total": 4718592, "linear_dense_nnz": 578560}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631808, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 158720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 442368, "linear_attention_total": 2359296, "linear_attention_nnz": 352256, "linear_dense_total": 4718592, "linear_dense_nnz": 90112}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 312320, "linear_dense_total": 4718592, "linear_dense_nnz": 206848}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 401408, "linear_attention_total": 2359296, "linear_attention_nnz": 186368, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 64.01669981444019, "linear_sparsity": 82.0191936728395}, "speed": {"eval_elapsed_time": 24.0219326200895, "cuda_eval_elapsed_time": 16.85802384185791}, "opt_eval_metrics": {"exact_match": 77.04824976348155, "f1": 85.17930403802184}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 37334018, "linear_total": 84934656, "linear_nnz": 13375488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663488, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1282560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451520, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 566784}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1385472, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1265664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 675840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1069056, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.7150099364344, "linear_sparsity": 84.25202546296296}, "speed": {"eval_elapsed_time": 18.124292518012226, "cuda_eval_elapsed_time": 10.910909118652343}, "opt_eval_metrics": {"exact_match": 76.54683065279092, "f1": 84.56290825102765}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37189634, "linear_total": 84934656, "linear_nnz": 13231104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1658880, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1233408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1354752, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1387008, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 678912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.84760225492897, "linear_sparsity": 84.42201967592592}, "speed": {"eval_elapsed_time": 17.95050869276747, "cuda_eval_elapsed_time": 10.839029987335206}, "opt_eval_metrics": {"exact_match": 75.99810785241249, "f1": 84.26442986520863}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36773378, "linear_total": 84934656, "linear_nnz": 12814848, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1044480, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1177088, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1450496, "linear_attention_total": 2359296, "linear_attention_nnz": 492032, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652224, "linear_attention_total": 2359296, "linear_attention_nnz": 733696, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1511680, "linear_attention_total": 2359296, "linear_attention_nnz": 461056, "linear_dense_total": 4718592, "linear_dense_nnz": 1050624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1533952, "linear_attention_total": 2359296, "linear_attention_nnz": 580096, "linear_dense_total": 4718592, "linear_dense_nnz": 953856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1227520, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 764928}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 624384, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 700416, "linear_attention_total": 2359296, "linear_attention_nnz": 351744, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 479744, "linear_attention_total": 2359296, "linear_attention_nnz": 339968, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411392, "linear_attention_total": 2359296, "linear_attention_nnz": 276224, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 430080, "linear_attention_total": 2359296, "linear_attention_nnz": 178176, "linear_dense_total": 4718592, "linear_dense_nnz": 251904}}, "total_sparsity": 66.22986308803564, "linear_sparsity": 84.912109375}, "speed": {"eval_elapsed_time": 21.342612544074655, "cuda_eval_elapsed_time": 14.216132064819336}, "opt_eval_metrics": {"exact_match": 77.94701986754967, "f1": 86.06827252573265}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 47136529, "linear_total": 84934656, "linear_nnz": 23220736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1984512, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2164224, "linear_attention_total": 2359296, "linear_attention_nnz": 592896, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2625536, "linear_attention_total": 2359296, "linear_attention_nnz": 880640, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2992640, "linear_attention_total": 2359296, "linear_attention_nnz": 1230848, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2940928, "linear_attention_total": 2359296, "linear_attention_nnz": 1214464, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2535936, "linear_attention_total": 2359296, "linear_attention_nnz": 906240, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2213376, "linear_attention_total": 2359296, "linear_attention_nnz": 943104, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1923584, "linear_attention_total": 2359296, "linear_attention_nnz": 935936, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1419264, "linear_attention_total": 2359296, "linear_attention_nnz": 872448, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 883712, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667648, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 869376, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 56.713059162397904, "linear_sparsity": 72.66046971450618}, "speed": {"eval_elapsed_time": 25.600778602063656, "cuda_eval_elapsed_time": 18.375184078216552}, "opt_eval_metrics": {"exact_match": 80.27436140018922, "f1": 87.70461789964966}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46572775, "linear_total": 84934656, "linear_nnz": 22657536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1908736, "linear_attention_total": 2359296, "linear_attention_nnz": 627712, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2145280, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 1548288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2499072, "linear_attention_total": 2359296, "linear_attention_nnz": 789504, "linear_dense_total": 4718592, "linear_dense_nnz": 1709568}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2920960, "linear_attention_total": 2359296, "linear_attention_nnz": 1180672, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1204224, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2516992, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 1600512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2151936, "linear_attention_total": 2359296, "linear_attention_nnz": 909312, "linear_dense_total": 4718592, "linear_dense_nnz": 1242624}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1889792, "linear_attention_total": 2359296, "linear_attention_nnz": 917504, "linear_dense_total": 4718592, "linear_dense_nnz": 972288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1398272, "linear_attention_total": 2359296, "linear_attention_nnz": 856064, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858624, "linear_attention_total": 2359296, "linear_attention_nnz": 611328, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 634368, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 827392, "linear_attention_total": 2359296, "linear_attention_nnz": 268288, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}}, "total_sparsity": 57.23077199706509, "linear_sparsity": 73.32356770833333}, "speed": {"eval_elapsed_time": 25.450434973929077, "cuda_eval_elapsed_time": 18.270113506317138}, "opt_eval_metrics": {"exact_match": 79.80132450331126, "f1": 87.48291010744668}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-10000": {"stats": {"total": 108893186, "nnz": 107798786, "linear_total": 84934656, "linear_nnz": 83840256, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6865920, "linear_attention_total": 2359296, "linear_attention_nnz": 2151936, "linear_dense_total": 4718592, "linear_dense_nnz": 4713984}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7009024, "linear_attention_total": 2359296, "linear_attention_nnz": 2299648, "linear_dense_total": 4718592, "linear_dense_nnz": 4709376}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7037952, "linear_attention_total": 2359296, "linear_attention_nnz": 2330112, "linear_dense_total": 4718592, "linear_dense_nnz": 4707840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7041024, "linear_attention_total": 2359296, "linear_attention_nnz": 2330112, "linear_dense_total": 4718592, "linear_dense_nnz": 4710912}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7029760, "linear_attention_total": 2359296, "linear_attention_nnz": 2324992, "linear_dense_total": 4718592, "linear_dense_nnz": 4704768}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7043584, "linear_attention_total": 2359296, "linear_attention_nnz": 2337280, "linear_dense_total": 4718592, "linear_dense_nnz": 4706304}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7006464, "linear_attention_total": 2359296, "linear_attention_nnz": 2321664, "linear_dense_total": 4718592, "linear_dense_nnz": 4684800}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7025664, "linear_attention_total": 2359296, "linear_attention_nnz": 2342400, "linear_dense_total": 4718592, "linear_dense_nnz": 4683264}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6950656, "linear_attention_total": 2359296, "linear_attention_nnz": 2296576, "linear_dense_total": 4718592, "linear_dense_nnz": 4654080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6905600, "linear_attention_total": 2359296, "linear_attention_nnz": 2259200, "linear_dense_total": 4718592, "linear_dense_nnz": 4646400}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6924288, "linear_attention_total": 2359296, "linear_attention_nnz": 2285568, "linear_dense_total": 4718592, "linear_dense_nnz": 4638720}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7000320, "linear_attention_total": 2359296, "linear_attention_nnz": 2312448, "linear_dense_total": 4718592, "linear_dense_nnz": 4687872}}, "total_sparsity": 1.005021563057218, "linear_sparsity": 1.288519965277779}, "speed": {"eval_elapsed_time": 47.70582241564989, "cuda_eval_elapsed_time": 40.403957000732426}, "opt_eval_metrics": {"exact_match": 80.90823084200568, "f1": 88.13888839423888}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-15000": {"stats": {"total": 108893186, "nnz": 104455426, "linear_total": 84934656, "linear_nnz": 80496896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6593280, "linear_attention_total": 2359296, "linear_attention_nnz": 1914624, "linear_dense_total": 4718592, "linear_dense_nnz": 4678656}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6772736, "linear_attention_total": 2359296, "linear_attention_nnz": 2103296, "linear_dense_total": 4718592, "linear_dense_nnz": 4669440}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6885632, "linear_attention_total": 2359296, "linear_attention_nnz": 2239232, "linear_dense_total": 4718592, "linear_dense_nnz": 4646400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6876672, "linear_attention_total": 2359296, "linear_attention_nnz": 2219520, "linear_dense_total": 4718592, "linear_dense_nnz": 4657152}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6873600, "linear_attention_total": 2359296, "linear_attention_nnz": 2216448, "linear_dense_total": 4718592, "linear_dense_nnz": 4657152}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6841856, "linear_attention_total": 2359296, "linear_attention_nnz": 2226176, "linear_dense_total": 4718592, "linear_dense_nnz": 4615680}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6786560, "linear_attention_total": 2359296, "linear_attention_nnz": 2190848, "linear_dense_total": 4718592, "linear_dense_nnz": 4595712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6811392, "linear_attention_total": 2359296, "linear_attention_nnz": 2261760, "linear_dense_total": 4718592, "linear_dense_nnz": 4549632}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6609408, "linear_attention_total": 2359296, "linear_attention_nnz": 2178048, "linear_dense_total": 4718592, "linear_dense_nnz": 4431360}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6399744, "linear_attention_total": 2359296, "linear_attention_nnz": 2049792, "linear_dense_total": 4718592, "linear_dense_nnz": 4349952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6406656, "linear_attention_total": 2359296, "linear_attention_nnz": 2053632, "linear_dense_total": 4718592, "linear_dense_nnz": 4353024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6639360, "linear_attention_total": 2359296, "linear_attention_nnz": 2100480, "linear_dense_total": 4718592, "linear_dense_nnz": 4538880}}, "total_sparsity": 4.075333051601593, "linear_sparsity": 5.224910783179015}, "speed": {"eval_elapsed_time": 47.13309640903026, "cuda_eval_elapsed_time": 39.830447120666506}, "opt_eval_metrics": {"exact_match": 78.21192052980132, "f1": 86.2154189083501}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 58295010, "linear_total": 84934656, "linear_nnz": 34364416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2712576, "linear_attention_total": 2359296, "linear_attention_nnz": 934912, "linear_dense_total": 4718592, "linear_dense_nnz": 1777664}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2907136, "linear_attention_total": 2359296, "linear_attention_nnz": 738304, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4201472, "linear_attention_total": 2359296, "linear_attention_nnz": 1162240, "linear_dense_total": 4718592, "linear_dense_nnz": 3039232}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4531200, "linear_attention_total": 2359296, "linear_attention_nnz": 1366016, "linear_dense_total": 4718592, "linear_dense_nnz": 3165184}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4667392, "linear_attention_total": 2359296, "linear_attention_nnz": 1484800, "linear_dense_total": 4718592, "linear_dense_nnz": 3182592}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4599808, "linear_attention_total": 2359296, "linear_attention_nnz": 1414144, "linear_dense_total": 4718592, "linear_dense_nnz": 3185664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3828736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 2572288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2659328, "linear_attention_total": 2359296, "linear_attention_nnz": 991232, "linear_dense_total": 4718592, "linear_dense_nnz": 1668096}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1654784, "linear_attention_total": 2359296, "linear_attention_nnz": 966656, "linear_dense_total": 4718592, "linear_dense_nnz": 688128}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 927744, "linear_attention_total": 2359296, "linear_attention_nnz": 691200, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 873472, "linear_attention_total": 2359296, "linear_attention_nnz": 530432, "linear_dense_total": 4718592, "linear_dense_nnz": 343040}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 800768, "linear_attention_total": 2359296, "linear_attention_nnz": 378880, "linear_dense_total": 4718592, "linear_dense_nnz": 421888}}, "total_sparsity": 46.46587895775224, "linear_sparsity": 59.540171682098766}, "speed": {"eval_elapsed_time": 32.61186430603266, "cuda_eval_elapsed_time": 25.313612380981446}, "opt_eval_metrics": {"exact_match": 79.64995269631031, "f1": 87.30139925832849}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 56885634, "linear_total": 84934656, "linear_nnz": 32956416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2584576, "linear_attention_total": 2359296, "linear_attention_nnz": 949248, "linear_dense_total": 4718592, "linear_dense_nnz": 1635328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2798592, "linear_attention_total": 2359296, "linear_attention_nnz": 750592, "linear_dense_total": 4718592, "linear_dense_nnz": 2048000}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4019200, "linear_attention_total": 2359296, "linear_attention_nnz": 1123328, "linear_dense_total": 4718592, "linear_dense_nnz": 2895872}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4274176, "linear_attention_total": 2359296, "linear_attention_nnz": 1306624, "linear_dense_total": 4718592, "linear_dense_nnz": 2967552}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4581376, "linear_attention_total": 2359296, "linear_attention_nnz": 1475584, "linear_dense_total": 4718592, "linear_dense_nnz": 3105792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4219904, "linear_attention_total": 2359296, "linear_attention_nnz": 1285120, "linear_dense_total": 4718592, "linear_dense_nnz": 2934784}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3736576, "linear_attention_total": 2359296, "linear_attention_nnz": 1235968, "linear_dense_total": 4718592, "linear_dense_nnz": 2500608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2587648, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1604608}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1627136, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 661504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 880640, "linear_attention_total": 2359296, "linear_attention_nnz": 650240, "linear_dense_total": 4718592, "linear_dense_nnz": 230400}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 862208, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 352256}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784384, "linear_attention_total": 2359296, "linear_attention_nnz": 363520, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}}, "total_sparsity": 47.76015277944021, "linear_sparsity": 61.19791666666667}, "speed": {"eval_elapsed_time": 32.03074289299548, "cuda_eval_elapsed_time": 24.748493873596193}, "opt_eval_metrics": {"exact_match": 79.90539262062441, "f1": 87.36378709007766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 55520034, "linear_total": 84934656, "linear_nnz": 31592448, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2382848, "linear_attention_total": 2359296, "linear_attention_nnz": 889856, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2568192, "linear_attention_total": 2359296, "linear_attention_nnz": 717824, "linear_dense_total": 4718592, "linear_dense_nnz": 1850368}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3915776, "linear_attention_total": 2359296, "linear_attention_nnz": 1113088, "linear_dense_total": 4718592, "linear_dense_nnz": 2802688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4258816, "linear_attention_total": 2359296, "linear_attention_nnz": 1297408, "linear_dense_total": 4718592, "linear_dense_nnz": 2961408}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4300800, "linear_attention_total": 2359296, "linear_attention_nnz": 1402880, "linear_dense_total": 4718592, "linear_dense_nnz": 2897920}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4030464, "linear_attention_total": 2359296, "linear_attention_nnz": 1157120, "linear_dense_total": 4718592, "linear_dense_nnz": 2873344}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3661824, "linear_attention_total": 2359296, "linear_attention_nnz": 1187840, "linear_dense_total": 4718592, "linear_dense_nnz": 2473984}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2507776, "linear_attention_total": 2359296, "linear_attention_nnz": 979968, "linear_dense_total": 4718592, "linear_dense_nnz": 1527808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1562624, "linear_attention_total": 2359296, "linear_attention_nnz": 952320, "linear_dense_total": 4718592, "linear_dense_nnz": 610304}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 865280, "linear_attention_total": 2359296, "linear_attention_nnz": 642048, "linear_dense_total": 4718592, "linear_dense_nnz": 223232}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818176, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 328704}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 719872, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 388096}}, "total_sparsity": 49.0142257386059, "linear_sparsity": 62.80381944444444}, "speed": {"eval_elapsed_time": 31.406295038294047, "cuda_eval_elapsed_time": 24.124949531555178}, "opt_eval_metrics": {"exact_match": 79.82024597918638, "f1": 87.30735739624531}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 55329122, "linear_total": 84934656, "linear_nnz": 31404032, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2845696, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 2385920}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3173376, "linear_attention_total": 2359296, "linear_attention_nnz": 374784, "linear_dense_total": 4718592, "linear_dense_nnz": 2798592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3866624, "linear_attention_total": 2359296, "linear_attention_nnz": 411648, "linear_dense_total": 4718592, "linear_dense_nnz": 3454976}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4224000, "linear_attention_total": 2359296, "linear_attention_nnz": 727040, "linear_dense_total": 4718592, "linear_dense_nnz": 3496960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3954688, "linear_attention_total": 2359296, "linear_attention_nnz": 541696, "linear_dense_total": 4718592, "linear_dense_nnz": 3412992}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3993600, "linear_attention_total": 2359296, "linear_attention_nnz": 545792, "linear_dense_total": 4718592, "linear_dense_nnz": 3447808}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3427328, "linear_attention_total": 2359296, "linear_attention_nnz": 493568, "linear_dense_total": 4718592, "linear_dense_nnz": 2933760}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2641920, "linear_attention_total": 2359296, "linear_attention_nnz": 641024, "linear_dense_total": 4718592, "linear_dense_nnz": 2000896}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1293312, "linear_attention_total": 2359296, "linear_attention_nnz": 288768, "linear_dense_total": 4718592, "linear_dense_nnz": 1004544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 678912, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 339968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 671744, "linear_attention_total": 2359296, "linear_attention_nnz": 254976, "linear_dense_total": 4718592, "linear_dense_nnz": 416768}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 632832, "linear_attention_total": 2359296, "linear_attention_nnz": 165888, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}}, "total_sparsity": 49.1895461668281, "linear_sparsity": 63.025655864197525}, "speed": {"eval_elapsed_time": 28.903804030269384, "cuda_eval_elapsed_time": 21.577418830871583}, "opt_eval_metrics": {"exact_match": 77.68211920529801, "f1": 86.11161494070976}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 54788706, "linear_total": 84934656, "linear_nnz": 30864384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2661376, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 2226176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3087360, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 2727936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3895296, "linear_attention_total": 2359296, "linear_attention_nnz": 421888, "linear_dense_total": 4718592, "linear_dense_nnz": 3473408}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4162560, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 3451904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3985408, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 3437568}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3881984, "linear_attention_total": 2359296, "linear_attention_nnz": 556032, "linear_dense_total": 4718592, "linear_dense_nnz": 3325952}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3340288, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 2828288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2614272, "linear_attention_total": 2359296, "linear_attention_nnz": 622592, "linear_dense_total": 4718592, "linear_dense_nnz": 1991680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1256448, "linear_attention_total": 2359296, "linear_attention_nnz": 276480, "linear_dense_total": 4718592, "linear_dense_nnz": 979968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668672, "linear_attention_total": 2359296, "linear_attention_nnz": 337920, "linear_dense_total": 4718592, "linear_dense_nnz": 330752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 664576, "linear_attention_total": 2359296, "linear_attention_nnz": 252928, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 646144, "linear_attention_total": 2359296, "linear_attention_nnz": 158720, "linear_dense_total": 4718592, "linear_dense_nnz": 487424}}, "total_sparsity": 49.68582699012958, "linear_sparsity": 63.66102430555556}, "speed": {"eval_elapsed_time": 28.641465611290187, "cuda_eval_elapsed_time": 21.28239717102051}, "opt_eval_metrics": {"exact_match": 77.96594134342479, "f1": 86.01491496793933}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53430466, "linear_total": 84934656, "linear_nnz": 29507584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2533376, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 2119680}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2840576, "linear_attention_total": 2359296, "linear_attention_nnz": 364544, "linear_dense_total": 4718592, "linear_dense_nnz": 2476032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3843072, "linear_attention_total": 2359296, "linear_attention_nnz": 397312, "linear_dense_total": 4718592, "linear_dense_nnz": 3445760}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4069376, "linear_attention_total": 2359296, "linear_attention_nnz": 666624, "linear_dense_total": 4718592, "linear_dense_nnz": 3402752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3831808, "linear_attention_total": 2359296, "linear_attention_nnz": 492544, "linear_dense_total": 4718592, "linear_dense_nnz": 3339264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3714048, "linear_attention_total": 2359296, "linear_attention_nnz": 519168, "linear_dense_total": 4718592, "linear_dense_nnz": 3194880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3200000, "linear_attention_total": 2359296, "linear_attention_nnz": 448512, "linear_dense_total": 4718592, "linear_dense_nnz": 2751488}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415616, "linear_attention_total": 2359296, "linear_attention_nnz": 576512, "linear_dense_total": 4718592, "linear_dense_nnz": 1839104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211392, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619520, "linear_attention_total": 2359296, "linear_attention_nnz": 317440, "linear_dense_total": 4718592, "linear_dense_nnz": 302080}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 623616, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 386048}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 50.93314103235074, "linear_sparsity": 65.25848765432099}, "speed": {"eval_elapsed_time": 27.97377561684698, "cuda_eval_elapsed_time": 20.68525614929199}, "opt_eval_metrics": {"exact_match": 77.70104068117313, "f1": 85.88451743537976}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 40733175, "linear_total": 84934656, "linear_nnz": 16822784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1394688, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1640960, "linear_attention_total": 2359296, "linear_attention_nnz": 539648, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1878528, "linear_attention_total": 2359296, "linear_attention_nnz": 657408, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 864256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1902592, "linear_attention_total": 2359296, "linear_attention_nnz": 686080, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601536, "linear_attention_total": 2359296, "linear_attention_nnz": 649216, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1507328, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908800, "linear_attention_total": 2359296, "linear_attention_nnz": 474112, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 354304, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591872, "linear_attention_total": 2359296, "linear_attention_nnz": 226304, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 62.593458327135366, "linear_sparsity": 80.19326292438271}, "speed": {"eval_elapsed_time": 22.965831307694316, "cuda_eval_elapsed_time": 15.796802043914795}, "opt_eval_metrics": {"exact_match": 78.68495742667928, "f1": 86.66781681977909}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40239113, "linear_total": 84934656, "linear_nnz": 16329216, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1344512, "linear_attention_total": 2359296, "linear_attention_nnz": 518144, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 516096, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842176, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2097664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1184256}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2056192, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 664576, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1565184, "linear_attention_total": 2359296, "linear_attention_nnz": 629760, "linear_dense_total": 4718592, "linear_dense_nnz": 935424}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1486336, "linear_attention_total": 2359296, "linear_attention_nnz": 787456, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 844288, "linear_attention_total": 2359296, "linear_attention_nnz": 415744, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 423936, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 472064, "linear_attention_total": 2359296, "linear_attention_nnz": 324608, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555520, "linear_attention_total": 2359296, "linear_attention_nnz": 209920, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}}, "total_sparsity": 63.04717083032174, "linear_sparsity": 80.7743778935185}, "speed": {"eval_elapsed_time": 22.811819266993552, "cuda_eval_elapsed_time": 15.618790004730226}, "opt_eval_metrics": {"exact_match": 78.80794701986756, "f1": 86.74156854566804}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.9999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-5000": {"stats": {"total": 108893186, "nnz": 108881410, "linear_total": 84934656, "linear_nnz": 84922880, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7072768, "linear_attention_total": 2359296, "linear_attention_nnz": 2354176, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7076352, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4717056}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7076864, "linear_attention_total": 2359296, "linear_attention_nnz": 2358272, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7076864, "linear_attention_total": 2359296, "linear_attention_nnz": 2358272, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7077888, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4718592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 7074816, "linear_attention_total": 2359296, "linear_attention_nnz": 2359296, "linear_dense_total": 4718592, "linear_dense_nnz": 4715520}}, "total_sparsity": 0.010814267111258768, "linear_sparsity": 0.013864776234573384}, "speed": {"eval_elapsed_time": 46.91258597606793, "cuda_eval_elapsed_time": 39.58176746368408}, "opt_eval_metrics": {"exact_match": 78.7038789025544, "f1": 86.6699349353281}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 43189250, "linear_total": 84934656, "linear_nnz": 19230720, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1584896, "linear_attention_total": 2359296, "linear_attention_nnz": 494336, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1917184, "linear_attention_total": 2359296, "linear_attention_nnz": 631552, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2092032, "linear_attention_total": 2359296, "linear_attention_nnz": 648192, "linear_dense_total": 4718592, "linear_dense_nnz": 1443840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2466816, "linear_attention_total": 2359296, "linear_attention_nnz": 1047552, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2403328, "linear_attention_total": 2359296, "linear_attention_nnz": 942592, "linear_dense_total": 4718592, "linear_dense_nnz": 1460736}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2211072, "linear_attention_total": 2359296, "linear_attention_nnz": 837888, "linear_dense_total": 4718592, "linear_dense_nnz": 1373184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1936640, "linear_attention_total": 2359296, "linear_attention_nnz": 841472, "linear_dense_total": 4718592, "linear_dense_nnz": 1095168}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661440, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 827904}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1084160, "linear_attention_total": 2359296, "linear_attention_nnz": 621824, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621056, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 188928}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 568064, "linear_attention_total": 2359296, "linear_attention_nnz": 411392, "linear_dense_total": 4718592, "linear_dense_nnz": 156672}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 684032, "linear_attention_total": 2359296, "linear_attention_nnz": 223232, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 60.33796825450584, "linear_sparsity": 77.3582175925926}, "speed": {"eval_elapsed_time": 25.082025394309312, "cuda_eval_elapsed_time": 17.906531421661377}, "opt_eval_metrics": {"exact_match": 78.73226111636707, "f1": 86.74884583609185}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42070530, "linear_total": 84934656, "linear_nnz": 18112000, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451008, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 970752}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835264, "linear_attention_total": 2359296, "linear_attention_nnz": 620288, "linear_dense_total": 4718592, "linear_dense_nnz": 1214976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2000384, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1374720}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2271232, "linear_attention_total": 2359296, "linear_attention_nnz": 933376, "linear_dense_total": 4718592, "linear_dense_nnz": 1337856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2267904, "linear_attention_total": 2359296, "linear_attention_nnz": 862464, "linear_dense_total": 4718592, "linear_dense_nnz": 1405440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081536, "linear_attention_total": 2359296, "linear_attention_nnz": 783616, "linear_dense_total": 4718592, "linear_dense_nnz": 1297920}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1807104, "linear_attention_total": 2359296, "linear_attention_nnz": 773376, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602048, "linear_attention_total": 2359296, "linear_attention_nnz": 811008, "linear_dense_total": 4718592, "linear_dense_nnz": 791040}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1009920, "linear_attention_total": 2359296, "linear_attention_nnz": 572160, "linear_dense_total": 4718592, "linear_dense_nnz": 437760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 543232, "linear_attention_total": 2359296, "linear_attention_nnz": 392704, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649472, "linear_attention_total": 2359296, "linear_attention_nnz": 214784, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}}, "total_sparsity": 61.365323630075444, "linear_sparsity": 78.67537133487654}, "speed": {"eval_elapsed_time": 24.917593302205205, "cuda_eval_elapsed_time": 17.75180138397217}, "opt_eval_metrics": {"exact_match": 78.97824030274361, "f1": 86.77789246016766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41670402, "linear_total": 84934656, "linear_nnz": 17711872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1409024, "linear_attention_total": 2359296, "linear_attention_nnz": 468992, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1792000, "linear_attention_total": 2359296, "linear_attention_nnz": 606208, "linear_dense_total": 4718592, "linear_dense_nnz": 1185792}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1974272, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1348608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2231552, "linear_attention_total": 2359296, "linear_attention_nnz": 910592, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2209536, "linear_attention_total": 2359296, "linear_attention_nnz": 828672, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2046464, "linear_attention_total": 2359296, "linear_attention_nnz": 765440, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1764096, "linear_attention_total": 2359296, "linear_attention_nnz": 761088, "linear_dense_total": 4718592, "linear_dense_nnz": 1003008}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1573120, "linear_attention_total": 2359296, "linear_attention_nnz": 792832, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 986880, "linear_attention_total": 2359296, "linear_attention_nnz": 553728, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 572672, "linear_attention_total": 2359296, "linear_attention_nnz": 389888, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 525568, "linear_attention_total": 2359296, "linear_attention_nnz": 378112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}}, "total_sparsity": 61.73277361909495, "linear_sparsity": 79.14647231867285}, "speed": {"eval_elapsed_time": 24.480814102105796, "cuda_eval_elapsed_time": 17.306304389953613}, "opt_eval_metrics": {"exact_match": 78.74172185430463, "f1": 86.69521763053608}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41478658, "linear_total": 84934656, "linear_nnz": 17520128, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404160, "linear_attention_total": 2359296, "linear_attention_nnz": 465664, "linear_dense_total": 4718592, "linear_dense_nnz": 938496}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1766912, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1182720}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1961216, "linear_attention_total": 2359296, "linear_attention_nnz": 615680, "linear_dense_total": 4718592, "linear_dense_nnz": 1345536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 895488, "linear_dense_total": 4718592, "linear_dense_nnz": 1314816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2189824, "linear_attention_total": 2359296, "linear_attention_nnz": 812032, "linear_dense_total": 4718592, "linear_dense_nnz": 1377792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2038016, "linear_attention_total": 2359296, "linear_attention_nnz": 755456, "linear_dense_total": 4718592, "linear_dense_nnz": 1282560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1738240, "linear_attention_total": 2359296, "linear_attention_nnz": 739840, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571584, "linear_attention_total": 2359296, "linear_attention_nnz": 797440, "linear_dense_total": 4718592, "linear_dense_nnz": 774144}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 943872, "linear_attention_total": 2359296, "linear_attention_nnz": 513792, "linear_dense_total": 4718592, "linear_dense_nnz": 430080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563968, "linear_attention_total": 2359296, "linear_attention_nnz": 381184, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516352, "linear_attention_total": 2359296, "linear_attention_nnz": 370432, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 615680, "linear_attention_total": 2359296, "linear_attention_nnz": 200960, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}}, "total_sparsity": 61.90885809879785, "linear_sparsity": 79.37222704475309}, "speed": {"eval_elapsed_time": 24.51584801170975, "cuda_eval_elapsed_time": 17.32754041290283}, "opt_eval_metrics": {"exact_match": 78.84578997161779, "f1": 86.78133258210022}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40218943, "linear_total": 84934656, "linear_nnz": 16260413, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725694, "linear_attention_total": 2359296, "linear_attention_nnz": 36794, "linear_dense_total": 4718592, "linear_dense_nnz": 1688900}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959620, "linear_attention_total": 2359296, "linear_attention_nnz": 233028, "linear_dense_total": 4718592, "linear_dense_nnz": 1726592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969125, "linear_attention_total": 2359296, "linear_attention_nnz": 194318, "linear_dense_total": 4718592, "linear_dense_nnz": 1774807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2012358, "linear_attention_total": 2359296, "linear_attention_nnz": 270153, "linear_dense_total": 4718592, "linear_dense_nnz": 1742205}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860862, "linear_attention_total": 2359296, "linear_attention_nnz": 207935, "linear_dense_total": 4718592, "linear_dense_nnz": 1652927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815188, "linear_attention_total": 2359296, "linear_attention_nnz": 215427, "linear_dense_total": 4718592, "linear_dense_nnz": 1599761}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518978, "linear_attention_total": 2359296, "linear_attention_nnz": 114563, "linear_dense_total": 4718592, "linear_dense_nnz": 1404415}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307646, "linear_attention_total": 2359296, "linear_attention_nnz": 165011, "linear_dense_total": 4718592, "linear_dense_nnz": 1142635}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 946142, "linear_attention_total": 2359296, "linear_attention_nnz": 86589, "linear_dense_total": 4718592, "linear_dense_nnz": 859553}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531809, "linear_attention_total": 2359296, "linear_attention_nnz": 110020, "linear_dense_total": 4718592, "linear_dense_nnz": 421789}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 419075, "linear_attention_total": 2359296, "linear_attention_nnz": 89475, "linear_dense_total": 4718592, "linear_dense_nnz": 329600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 193916, "linear_attention_total": 2359296, "linear_attention_nnz": 45791, "linear_dense_total": 4718592, "linear_dense_nnz": 148125}}, "total_sparsity": 63.065693568741764, "linear_sparsity": 80.85538487375518}, "speed": {"eval_elapsed_time": 32.04050999786705, "cuda_eval_elapsed_time": 24.64119245147705}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.30981160352648}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl5_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 81807426, "linear_total": 84934656, "linear_nnz": 57862144, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5237760, "linear_attention_total": 2359296, "linear_attention_nnz": 921600, "linear_dense_total": 4718592, "linear_dense_nnz": 4316160}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5140480, "linear_attention_total": 2359296, "linear_attention_nnz": 829440, "linear_dense_total": 4718592, "linear_dense_nnz": 4311040}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5641216, "linear_attention_total": 2359296, "linear_attention_nnz": 1221632, "linear_dense_total": 4718592, "linear_dense_nnz": 4419584}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5816320, "linear_attention_total": 2359296, "linear_attention_nnz": 1386496, "linear_dense_total": 4718592, "linear_dense_nnz": 4429824}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5997568, "linear_attention_total": 2359296, "linear_attention_nnz": 1540096, "linear_dense_total": 4718592, "linear_dense_nnz": 4457472}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5968896, "linear_attention_total": 2359296, "linear_attention_nnz": 1548288, "linear_dense_total": 4718592, "linear_dense_nnz": 4420608}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5685248, "linear_attention_total": 2359296, "linear_attention_nnz": 1364992, "linear_dense_total": 4718592, "linear_dense_nnz": 4320256}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5458944, "linear_attention_total": 2359296, "linear_attention_nnz": 1272832, "linear_dense_total": 4718592, "linear_dense_nnz": 4186112}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4961280, "linear_attention_total": 2359296, "linear_attention_nnz": 1173504, "linear_dense_total": 4718592, "linear_dense_nnz": 3787776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3566592, "linear_attention_total": 2359296, "linear_attention_nnz": 727040, "linear_dense_total": 4718592, "linear_dense_nnz": 2839552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673664, "linear_attention_total": 2359296, "linear_attention_nnz": 671744, "linear_dense_total": 4718592, "linear_dense_nnz": 2001920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1714176, "linear_attention_total": 2359296, "linear_attention_nnz": 409600, "linear_dense_total": 4718592, "linear_dense_nnz": 1304576}}, "total_sparsity": 24.873695953757846, "linear_sparsity": 31.87451774691358}, "speed": {"eval_elapsed_time": 39.2419764213264, "cuda_eval_elapsed_time": 31.817585739135744}, "opt_eval_metrics": {"exact_match": 81.47587511825922, "f1": 88.73698799207777}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl5_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 81295202, "linear_total": 84934656, "linear_nnz": 57351168, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5262336, "linear_attention_total": 2359296, "linear_attention_nnz": 804864, "linear_dense_total": 4718592, "linear_dense_nnz": 4457472}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5235712, "linear_attention_total": 2359296, "linear_attention_nnz": 771072, "linear_dense_total": 4718592, "linear_dense_nnz": 4464640}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5669888, "linear_attention_total": 2359296, "linear_attention_nnz": 1152000, "linear_dense_total": 4718592, "linear_dense_nnz": 4517888}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5875712, "linear_attention_total": 2359296, "linear_attention_nnz": 1312768, "linear_dense_total": 4718592, "linear_dense_nnz": 4562944}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6056960, "linear_attention_total": 2359296, "linear_attention_nnz": 1501184, "linear_dense_total": 4718592, "linear_dense_nnz": 4555776}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5898240, "linear_attention_total": 2359296, "linear_attention_nnz": 1377280, "linear_dense_total": 4718592, "linear_dense_nnz": 4520960}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5831680, "linear_attention_total": 2359296, "linear_attention_nnz": 1357824, "linear_dense_total": 4718592, "linear_dense_nnz": 4473856}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5483520, "linear_attention_total": 2359296, "linear_attention_nnz": 1192960, "linear_dense_total": 4718592, "linear_dense_nnz": 4290560}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4958208, "linear_attention_total": 2359296, "linear_attention_nnz": 1069056, "linear_dense_total": 4718592, "linear_dense_nnz": 3889152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3236864, "linear_attention_total": 2359296, "linear_attention_nnz": 718848, "linear_dense_total": 4718592, "linear_dense_nnz": 2518016}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2222080, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 1607680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1619968, "linear_attention_total": 2359296, "linear_attention_nnz": 389120, "linear_dense_total": 4718592, "linear_dense_nnz": 1230848}}, "total_sparsity": 25.344087186502197, "linear_sparsity": 32.47612847222222}, "speed": {"eval_elapsed_time": 38.71227815328166, "cuda_eval_elapsed_time": 31.30978426361084}, "opt_eval_metrics": {"exact_match": 81.51371807000946, "f1": 88.67903677006836}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39702836, "linear_total": 84934656, "linear_nnz": 15791104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125888, "linear_attention_total": 2359296, "linear_attention_nnz": 838656, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1188352, "linear_attention_total": 2359296, "linear_attention_nnz": 692224, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1694720, "linear_attention_total": 2359296, "linear_attention_nnz": 1089536, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1962496, "linear_attention_total": 2359296, "linear_attention_nnz": 1291264, "linear_dense_total": 4718592, "linear_dense_nnz": 671232}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2112512, "linear_attention_total": 2359296, "linear_attention_nnz": 1384448, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1783296, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1632768, "linear_attention_total": 2359296, "linear_attention_nnz": 1127424, "linear_dense_total": 4718592, "linear_dense_nnz": 505344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1333760, "linear_attention_total": 2359296, "linear_attention_nnz": 942080, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1204736, "linear_attention_total": 2359296, "linear_attention_nnz": 982016, "linear_dense_total": 4718592, "linear_dense_nnz": 222720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 729600, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 573952, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 449024, "linear_attention_total": 2359296, "linear_attention_nnz": 293888, "linear_dense_total": 4718592, "linear_dense_nnz": 155136}}, "total_sparsity": 63.53965068117302, "linear_sparsity": 81.40793788580247}, "speed": {"eval_elapsed_time": 24.569451212882996, "cuda_eval_elapsed_time": 17.401466148376464}, "opt_eval_metrics": {"exact_match": 79.20529801324503, "f1": 87.11181141207972}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41908226, "linear_total": 84934656, "linear_nnz": 17949696, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081280, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 606720}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529856, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 841728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2437632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2115072, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1927680, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1448448, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 668160}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665088, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 173568}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 595968, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.51437244200017, "linear_sparsity": 78.86646412037037}, "speed": {"eval_elapsed_time": 19.843479705043137, "cuda_eval_elapsed_time": 12.72295495223999}, "opt_eval_metrics": {"exact_match": 78.05108798486282, "f1": 85.81174728555466}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41954306, "linear_total": 84934656, "linear_nnz": 17995776, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2079744, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1626624, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 840192}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434560, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1058304}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2116608, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1035264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1829376, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886208, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1099776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497600, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1210368, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 764928, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 546816, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.4720557446083, "linear_sparsity": 78.81221064814815}, "speed": {"eval_elapsed_time": 19.77598567586392, "cuda_eval_elapsed_time": 12.594031677246093}, "opt_eval_metrics": {"exact_match": 77.62535477767265, "f1": 85.49958980627748}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 36346370, "linear_total": 84934656, "linear_nnz": 12387840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1721856, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 950784, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1857024, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437696, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1113600, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1015296, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 425472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 731136, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 999936, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 213504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 296448, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 99840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 614400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 122880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}}, "total_sparsity": 66.62199781720042, "linear_sparsity": 85.4148582175926}, "speed": {"eval_elapsed_time": 17.89066500775516, "cuda_eval_elapsed_time": 10.789498657226563}, "opt_eval_metrics": {"exact_match": 76.75496688741723, "f1": 84.83470649534952}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 35750402, "linear_total": 84934656, "linear_nnz": 11791872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893952, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1247232, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 520704}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 978432, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 388608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 365568, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}}, "total_sparsity": 67.16929377013544, "linear_sparsity": 86.11653645833334}, "speed": {"eval_elapsed_time": 17.745041345246136, "cuda_eval_elapsed_time": 10.616429515838623}, "opt_eval_metrics": {"exact_match": 76.3197729422895, "f1": 84.62201750681498}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 35662850, "linear_total": 84934656, "linear_nnz": 11704320, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 897024, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 258048}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804800, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1184256, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1064448, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 474624}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 364032, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}}, "total_sparsity": 67.24969549518002, "linear_sparsity": 86.21961805555556}, "speed": {"eval_elapsed_time": 17.720320571679622, "cuda_eval_elapsed_time": 10.602042137145997}, "opt_eval_metrics": {"exact_match": 76.5279091769158, "f1": 84.6776690586996}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35638274, "linear_total": 84934656, "linear_nnz": 11679744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1586688, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 210432}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887808, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1175040, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 486912}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1062912, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 290304, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 93696}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360960, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 164352}}, "total_sparsity": 67.27226440045568, "linear_sparsity": 86.24855324074075}, "speed": {"eval_elapsed_time": 17.679683603346348, "cuda_eval_elapsed_time": 10.591327346801759}, "opt_eval_metrics": {"exact_match": 76.31031220435194, "f1": 84.63605545666391}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33844482, "linear_total": 84934656, "linear_nnz": 9885952, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701184, "linear_attention_total": 2359296, "linear_attention_nnz": 295680, "linear_dense_total": 4718592, "linear_dense_nnz": 405504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042688, "linear_attention_total": 2359296, "linear_attention_nnz": 380672, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1087744, "linear_attention_total": 2359296, "linear_attention_nnz": 328960, "linear_dense_total": 4718592, "linear_dense_nnz": 758784}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1340672, "linear_attention_total": 2359296, "linear_attention_nnz": 612608, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1142784, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 811008}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1165312, "linear_attention_total": 2359296, "linear_attention_nnz": 411136, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908032, "linear_attention_total": 2359296, "linear_attention_nnz": 319744, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 956672, "linear_attention_total": 2359296, "linear_attention_nnz": 457472, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 557056, "linear_attention_total": 2359296, "linear_attention_nnz": 246784, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360192, "linear_attention_total": 2359296, "linear_attention_nnz": 252672, "linear_dense_total": 4718592, "linear_dense_nnz": 107520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315904, "linear_attention_total": 2359296, "linear_attention_nnz": 202240, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 307712, "linear_attention_total": 2359296, "linear_attention_nnz": 129536, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}}, "total_sparsity": 68.91955939281638, "linear_sparsity": 88.36052035108025}, "speed": {"eval_elapsed_time": 21.232633945997804, "cuda_eval_elapsed_time": 14.06190474319458}, "opt_eval_metrics": {"exact_match": 76.33869441816462, "f1": 84.90005817955239}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 46753113, "linear_total": 84934656, "linear_nnz": 22841856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2904576, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1430016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2368512, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1582080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3084288, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1904640}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2600448, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1715712}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2244096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1654272}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096640, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1703424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1910784, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1476096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1082880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1534464, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 748032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523776, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 327168}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 990720, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1107456, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 812544}}, "total_sparsity": 57.06516200196401, "linear_sparsity": 73.10655381944444}, "speed": {"eval_elapsed_time": 20.70654077688232, "cuda_eval_elapsed_time": 13.55481234741211}, "opt_eval_metrics": {"exact_match": 78.31598864711448, "f1": 86.14732314693939}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 46580969, "linear_total": 84934656, "linear_nnz": 22669824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2883072, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1408512}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2342400, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1555968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3055104, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1875456}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2585088, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1700352}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1635840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2082816, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1689600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1901568, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1311744}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1468416, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1075200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1528320, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 741888}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520704, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 324096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1093632, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}}, "total_sparsity": 57.22324719197764, "linear_sparsity": 73.30910011574075}, "speed": {"eval_elapsed_time": 20.617251713294536, "cuda_eval_elapsed_time": 13.45258574295044}, "opt_eval_metrics": {"exact_match": 77.96594134342479, "f1": 85.85795020085484}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46293486, "linear_total": 84934656, "linear_nnz": 22382592, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850816, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1376256}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2323968, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1537536}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3022848, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1843200}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2557440, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1672704}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1620480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2068992, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1675776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790976, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1299456}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1462272, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1523712, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 737280}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 322560}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 970752, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 786432}}, "total_sparsity": 57.487251773494805, "linear_sparsity": 73.6472800925926}, "speed": {"eval_elapsed_time": 20.267827302217484, "cuda_eval_elapsed_time": 13.135342720031739}, "opt_eval_metrics": {"exact_match": 77.88079470198676, "f1": 85.81326419854291}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl300_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 26853628, "linear_total": 84934656, "linear_nnz": 2895098, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 357857, "linear_attention_total": 2359296, "linear_attention_nnz": 13195, "linear_dense_total": 4718592, "linear_dense_nnz": 344662}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 405482, "linear_attention_total": 2359296, "linear_attention_nnz": 53357, "linear_dense_total": 4718592, "linear_dense_nnz": 352125}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 395119, "linear_attention_total": 2359296, "linear_attention_nnz": 43981, "linear_dense_total": 4718592, "linear_dense_nnz": 351138}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 394117, "linear_attention_total": 2359296, "linear_attention_nnz": 71058, "linear_dense_total": 4718592, "linear_dense_nnz": 323059}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 335373, "linear_attention_total": 2359296, "linear_attention_nnz": 47705, "linear_dense_total": 4718592, "linear_dense_nnz": 287668}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 292526, "linear_attention_total": 2359296, "linear_attention_nnz": 40348, "linear_dense_total": 4718592, "linear_dense_nnz": 252178}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 238114, "linear_attention_total": 2359296, "linear_attention_nnz": 33002, "linear_dense_total": 4718592, "linear_dense_nnz": 205112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 188891, "linear_attention_total": 2359296, "linear_attention_nnz": 38753, "linear_dense_total": 4718592, "linear_dense_nnz": 150138}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 123365, "linear_attention_total": 2359296, "linear_attention_nnz": 22052, "linear_dense_total": 4718592, "linear_dense_nnz": 101313}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64415, "linear_attention_total": 2359296, "linear_attention_nnz": 28498, "linear_dense_total": 4718592, "linear_dense_nnz": 35917}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 53470, "linear_attention_total": 2359296, "linear_attention_nnz": 18747, "linear_dense_total": 4718592, "linear_dense_nnz": 34723}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 46369, "linear_attention_total": 2359296, "linear_attention_nnz": 15957, "linear_dense_total": 4718592, "linear_dense_nnz": 30412}}, "total_sparsity": 75.33947808267818, "linear_sparsity": 96.59138196780358}, "speed": {"eval_elapsed_time": 32.748252402991056, "cuda_eval_elapsed_time": 25.440285942077637}, "opt_eval_metrics": {"exact_match": 76.98202459791864, "f1": 85.40699359564026}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 40641026, "linear_total": 84934656, "linear_nnz": 16682496, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1930752, "linear_attention_total": 2359296, "linear_attention_nnz": 1390080, "linear_dense_total": 4718592, "linear_dense_nnz": 540672}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1347840, "linear_attention_total": 2359296, "linear_attention_nnz": 622848, "linear_dense_total": 4718592, "linear_dense_nnz": 724992}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2423808, "linear_attention_total": 2359296, "linear_attention_nnz": 1506816, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1864704, "linear_attention_total": 2359296, "linear_attention_nnz": 966144, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1956096, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 990720}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742592, "linear_attention_total": 2359296, "linear_attention_nnz": 734976, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1323264, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835584, "linear_attention_total": 2359296, "linear_attention_nnz": 282624, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307904, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 403968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 536064, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 506880, "linear_attention_total": 2359296, "linear_attention_nnz": 322560, "linear_dense_total": 4718592, "linear_dense_nnz": 184320}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 761088, "linear_attention_total": 2359296, "linear_attention_nnz": 412416, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}}, "total_sparsity": 62.67808162027695, "linear_sparsity": 80.3584346064815}, "speed": {"eval_elapsed_time": 20.2730004908517, "cuda_eval_elapsed_time": 13.145666324615478}, "opt_eval_metrics": {"exact_match": 76.13055818353831, "f1": 84.59415607632204}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 39460610, "linear_total": 84934656, "linear_nnz": 15502080, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1801728, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1127424, "linear_attention_total": 2359296, "linear_attention_nnz": 471552, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2341632, "linear_attention_total": 2359296, "linear_attention_nnz": 1507584, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804032, "linear_attention_total": 2359296, "linear_attention_nnz": 960768, "linear_dense_total": 4718592, "linear_dense_nnz": 843264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899264, "linear_attention_total": 2359296, "linear_attention_nnz": 968448, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529088, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 564480, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738048, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 506880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 893952, "linear_dense_total": 4718592, "linear_dense_nnz": 384000}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668928, "linear_attention_total": 2359296, "linear_attention_nnz": 535296, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488448, "linear_attention_total": 2359296, "linear_attention_nnz": 319488, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 560640, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}}, "total_sparsity": 63.76209435179903, "linear_sparsity": 81.7482277199074}, "speed": {"eval_elapsed_time": 19.889838815666735, "cuda_eval_elapsed_time": 12.730052349090576}, "opt_eval_metrics": {"exact_match": 76.20624408703878, "f1": 84.78885528858153}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 39496706, "linear_total": 84934656, "linear_nnz": 15538176, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798656, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1122816, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 649728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2325504, "linear_attention_total": 2359296, "linear_attention_nnz": 1500672, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790208, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886976, "linear_attention_total": 2359296, "linear_attention_nnz": 963840, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522944, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 924672}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 558336, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 737280, "linear_attention_total": 2359296, "linear_attention_nnz": 235008, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1286400, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 382464}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665856, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 417792, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 559104, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 301056}}, "total_sparsity": 63.72894627217538, "linear_sparsity": 81.70572916666666}, "speed": {"eval_elapsed_time": 19.804423895198852, "cuda_eval_elapsed_time": 12.679624610900879}, "opt_eval_metrics": {"exact_match": 75.67644276253547, "f1": 84.4740049617883}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39419906, "linear_total": 84934656, "linear_nnz": 15461376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1800192, "linear_attention_total": 2359296, "linear_attention_nnz": 1334784, "linear_dense_total": 4718592, "linear_dense_nnz": 465408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1118976, "linear_attention_total": 2359296, "linear_attention_nnz": 473856, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2320896, "linear_attention_total": 2359296, "linear_attention_nnz": 1497600, "linear_dense_total": 4718592, "linear_dense_nnz": 823296}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1788672, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 832512}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1891584, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1520640, "linear_attention_total": 2359296, "linear_attention_nnz": 600576, "linear_dense_total": 4718592, "linear_dense_nnz": 920064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 732672, "linear_attention_total": 2359296, "linear_attention_nnz": 230400, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277184, "linear_attention_total": 2359296, "linear_attention_nnz": 897792, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 660480, "linear_attention_total": 2359296, "linear_attention_nnz": 528384, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555264, "linear_attention_total": 2359296, "linear_attention_nnz": 257280, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.79947410116185, "linear_sparsity": 81.79615162037037}, "speed": {"eval_elapsed_time": 19.829505565110594, "cuda_eval_elapsed_time": 12.68363911819458}, "opt_eval_metrics": {"exact_match": 76.0170293282876, "f1": 84.48208063503463}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39382274, "linear_total": 84934656, "linear_nnz": 15423744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1793280, "linear_attention_total": 2359296, "linear_attention_nnz": 1323264, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1115136, "linear_attention_total": 2359296, "linear_attention_nnz": 470016, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2321664, "linear_attention_total": 2359296, "linear_attention_nnz": 1496832, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1789440, "linear_attention_total": 2359296, "linear_attention_nnz": 960000, "linear_dense_total": 4718592, "linear_dense_nnz": 829440}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1843968, "linear_attention_total": 2359296, "linear_attention_nnz": 917760, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526016, "linear_attention_total": 2359296, "linear_attention_nnz": 607488, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 567552, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730368, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1279488, "linear_attention_total": 2359296, "linear_attention_nnz": 900096, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667392, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 556032, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.83403273736522, "linear_sparsity": 81.84045862268519}, "speed": {"eval_elapsed_time": 19.654158322140574, "cuda_eval_elapsed_time": 12.51708829498291}, "opt_eval_metrics": {"exact_match": 75.93188268684958, "f1": 84.50981123274157}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 45409666, "linear_total": 84934656, "linear_nnz": 21492736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1715200, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 1234944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895424, "linear_attention_total": 2359296, "linear_attention_nnz": 400384, "linear_dense_total": 4718592, "linear_dense_nnz": 1495040}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3012608, "linear_attention_total": 2359296, "linear_attention_nnz": 594944, "linear_dense_total": 4718592, "linear_dense_nnz": 2417664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3094528, "linear_attention_total": 2359296, "linear_attention_nnz": 813056, "linear_dense_total": 4718592, "linear_dense_nnz": 2281472}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2762752, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 2163712}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2707456, "linear_attention_total": 2359296, "linear_attention_nnz": 562176, "linear_dense_total": 4718592, "linear_dense_nnz": 2145280}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2233344, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1741824, "linear_attention_total": 2359296, "linear_attention_nnz": 678912, "linear_dense_total": 4718592, "linear_dense_nnz": 1062912}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 709632, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 370688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 524288, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 164864}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 593920, "linear_attention_total": 2359296, "linear_attention_nnz": 267264, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 163840, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}}, "total_sparsity": 58.29889117212532, "linear_sparsity": 74.6949749228395}, "speed": {"eval_elapsed_time": 25.53750513214618, "cuda_eval_elapsed_time": 18.277880432128907}, "opt_eval_metrics": {"exact_match": 77.01986754966887, "f1": 85.2617013700351}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 44308674, "linear_total": 84934656, "linear_nnz": 20392960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1598464, "linear_attention_total": 2359296, "linear_attention_nnz": 458752, "linear_dense_total": 4718592, "linear_dense_nnz": 1139712}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825792, "linear_attention_total": 2359296, "linear_attention_nnz": 398336, "linear_dense_total": 4718592, "linear_dense_nnz": 1427456}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2854912, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 2257920}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2905088, "linear_attention_total": 2359296, "linear_attention_nnz": 781312, "linear_dense_total": 4718592, "linear_dense_nnz": 2123776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2643968, "linear_attention_total": 2359296, "linear_attention_nnz": 620544, "linear_dense_total": 4718592, "linear_dense_nnz": 2023424}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2543616, "linear_attention_total": 2359296, "linear_attention_nnz": 573440, "linear_dense_total": 4718592, "linear_dense_nnz": 1970176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2049024, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 1588224}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1708032, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 666624, "linear_attention_total": 2359296, "linear_attention_nnz": 307200, "linear_dense_total": 4718592, "linear_dense_nnz": 359424}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489472, "linear_attention_total": 2359296, "linear_attention_nnz": 327680, "linear_dense_total": 4718592, "linear_dense_nnz": 161792}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598016, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 509952, "linear_attention_total": 2359296, "linear_attention_nnz": 162816, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}}, "total_sparsity": 59.309966373837206, "linear_sparsity": 75.98982445987654}, "speed": {"eval_elapsed_time": 25.119796799961478, "cuda_eval_elapsed_time": 17.92396342086792}, "opt_eval_metrics": {"exact_match": 76.98202459791864, "f1": 85.22056943761015}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 43172098, "linear_total": 84934656, "linear_nnz": 19257344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1469440, "linear_attention_total": 2359296, "linear_attention_nnz": 443392, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 396288, "linear_dense_total": 4718592, "linear_dense_nnz": 1296384}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692096, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 2113536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2728960, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 1973248}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2531328, "linear_attention_total": 2359296, "linear_attention_nnz": 565248, "linear_dense_total": 4718592, "linear_dense_nnz": 1966080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434048, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 1887232}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1978368, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 1502208}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638400, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1000448}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 620544, "linear_attention_total": 2359296, "linear_attention_nnz": 310272, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 457728, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 308224}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 467968, "linear_attention_total": 2359296, "linear_attention_nnz": 152576, "linear_dense_total": 4718592, "linear_dense_nnz": 315392}}, "total_sparsity": 60.35371946964616, "linear_sparsity": 77.3268711419753}, "speed": {"eval_elapsed_time": 24.82955563813448, "cuda_eval_elapsed_time": 17.63341423416138}, "opt_eval_metrics": {"exact_match": 76.92526017029329, "f1": 85.21713644985097}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42975330, "linear_total": 84934656, "linear_nnz": 19060736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1463296, "linear_attention_total": 2359296, "linear_attention_nnz": 455680, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1699840, "linear_attention_total": 2359296, "linear_attention_nnz": 399360, "linear_dense_total": 4718592, "linear_dense_nnz": 1300480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2724864, "linear_attention_total": 2359296, "linear_attention_nnz": 544768, "linear_dense_total": 4718592, "linear_dense_nnz": 2180096}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2670592, "linear_attention_total": 2359296, "linear_attention_nnz": 731136, "linear_dense_total": 4718592, "linear_dense_nnz": 1939456}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2498560, "linear_attention_total": 2359296, "linear_attention_nnz": 557056, "linear_dense_total": 4718592, "linear_dense_nnz": 1941504}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2407424, "linear_attention_total": 2359296, "linear_attention_nnz": 527360, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1928192, "linear_attention_total": 2359296, "linear_attention_nnz": 472064, "linear_dense_total": 4718592, "linear_dense_nnz": 1456128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 607232, "linear_dense_total": 4718592, "linear_dense_nnz": 977920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 317440}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455680, "linear_attention_total": 2359296, "linear_attention_nnz": 308224, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 240640, "linear_dense_total": 4718592, "linear_dense_nnz": 305152}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 329728}}, "total_sparsity": 60.53441764482857, "linear_sparsity": 77.55835262345678}, "speed": {"eval_elapsed_time": 24.740368818864226, "cuda_eval_elapsed_time": 17.564620765686037}, "opt_eval_metrics": {"exact_match": 77.08609271523179, "f1": 85.20287591064626}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53955042, "linear_total": 84934656, "linear_nnz": 30029824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2392064, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 1844224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2719744, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 2172928}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3872768, "linear_attention_total": 2359296, "linear_attention_nnz": 675840, "linear_dense_total": 4718592, "linear_dense_nnz": 3196928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4077568, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 3111936}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4003840, "linear_attention_total": 2359296, "linear_attention_nnz": 896000, "linear_dense_total": 4718592, "linear_dense_nnz": 3107840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3832832, "linear_attention_total": 2359296, "linear_attention_nnz": 696320, "linear_dense_total": 4718592, "linear_dense_nnz": 3136512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3280896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 2525184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2510848, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1711104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257472, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 747520}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 420864, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 748544, "linear_attention_total": 2359296, "linear_attention_nnz": 356352, "linear_dense_total": 4718592, "linear_dense_nnz": 392192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 651264, "linear_attention_total": 2359296, "linear_attention_nnz": 217088, "linear_dense_total": 4718592, "linear_dense_nnz": 434176}}, "total_sparsity": 50.45140657377771, "linear_sparsity": 64.6436149691358}, "speed": {"eval_elapsed_time": 29.121937923133373, "cuda_eval_elapsed_time": 21.874919250488283}, "opt_eval_metrics": {"exact_match": 78.94985808893094, "f1": 86.768721062838}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47626001, "linear_total": 84934656, "linear_nnz": 23714304, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2895360, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2330112, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3240960, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2061312}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3095040, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1915392}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2291712, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1800192}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2221056, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1827840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1861632, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1300992, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 1104384}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637376, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 582144, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 385536}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1052160, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 560640}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1205760, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 910848}}, "total_sparsity": 56.2635617989908, "linear_sparsity": 72.07935474537037}, "speed": {"eval_elapsed_time": 20.942012635990977, "cuda_eval_elapsed_time": 13.628461513519287}, "opt_eval_metrics": {"exact_match": 77.78618732261117, "f1": 85.70556837897196}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 45695714, "linear_total": 84934656, "linear_nnz": 21777408, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1549312, "linear_attention_total": 2359296, "linear_attention_nnz": 679936, "linear_dense_total": 4718592, "linear_dense_nnz": 869376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1868800, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 1269760}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2739200, "linear_attention_total": 2359296, "linear_attention_nnz": 875520, "linear_dense_total": 4718592, "linear_dense_nnz": 1863680}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3088384, "linear_attention_total": 2359296, "linear_attention_nnz": 1137664, "linear_dense_total": 4718592, "linear_dense_nnz": 1950720}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2821120, "linear_attention_total": 2359296, "linear_attention_nnz": 1033216, "linear_dense_total": 4718592, "linear_dense_nnz": 1787904}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2709504, "linear_attention_total": 2359296, "linear_attention_nnz": 850944, "linear_dense_total": 4718592, "linear_dense_nnz": 1858560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225152, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1426432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 987136}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049600, "linear_attention_total": 2359296, "linear_attention_nnz": 782336, "linear_dense_total": 4718592, "linear_dense_nnz": 267264}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649216, "linear_attention_total": 2359296, "linear_attention_nnz": 504832, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 662528, "linear_attention_total": 2359296, "linear_attention_nnz": 379904, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 548864, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 290816}}, "total_sparsity": 58.036204395746125, "linear_sparsity": 74.35980902777779}, "speed": {"eval_elapsed_time": 26.92565976222977, "cuda_eval_elapsed_time": 19.635457836151122}, "opt_eval_metrics": {"exact_match": 77.92809839167455, "f1": 85.97854187426412}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 44413282, "linear_total": 84934656, "linear_nnz": 20496384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1436672, "linear_attention_total": 2359296, "linear_attention_nnz": 647168, "linear_dense_total": 4718592, "linear_dense_nnz": 789504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798144, "linear_attention_total": 2359296, "linear_attention_nnz": 591872, "linear_dense_total": 4718592, "linear_dense_nnz": 1206272}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2583552, "linear_attention_total": 2359296, "linear_attention_nnz": 843776, "linear_dense_total": 4718592, "linear_dense_nnz": 1739776}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2975744, "linear_attention_total": 2359296, "linear_attention_nnz": 1118208, "linear_dense_total": 4718592, "linear_dense_nnz": 1857536}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1760256}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2509824, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 1718272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2085888, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 1330176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1731584, "linear_attention_total": 2359296, "linear_attention_nnz": 827392, "linear_dense_total": 4718592, "linear_dense_nnz": 904192}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 726016, "linear_dense_total": 4718592, "linear_dense_nnz": 257024}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 464896, "linear_dense_total": 4718592, "linear_dense_nnz": 118784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 622592, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 263168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 512000, "linear_attention_total": 2359296, "linear_attention_nnz": 240640, "linear_dense_total": 4718592, "linear_dense_nnz": 271360}}, "total_sparsity": 59.21390159343854, "linear_sparsity": 75.86805555555556}, "speed": {"eval_elapsed_time": 26.45731420116499, "cuda_eval_elapsed_time": 19.24458102798462}, "opt_eval_metrics": {"exact_match": 77.8713339640492, "f1": 85.84893170709621}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 67469538, "linear_total": 84934656, "linear_nnz": 43535360, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4336640, "linear_attention_total": 2359296, "linear_attention_nnz": 571392, "linear_dense_total": 4718592, "linear_dense_nnz": 3765248}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4451328, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 3852288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4783104, "linear_attention_total": 2359296, "linear_attention_nnz": 695296, "linear_dense_total": 4718592, "linear_dense_nnz": 4087808}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5047296, "linear_attention_total": 2359296, "linear_attention_nnz": 996352, "linear_dense_total": 4718592, "linear_dense_nnz": 4050944}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5032960, "linear_attention_total": 2359296, "linear_attention_nnz": 923648, "linear_dense_total": 4718592, "linear_dense_nnz": 4109312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4907008, "linear_attention_total": 2359296, "linear_attention_nnz": 865280, "linear_dense_total": 4718592, "linear_dense_nnz": 4041728}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4636672, "linear_attention_total": 2359296, "linear_attention_nnz": 778240, "linear_dense_total": 4718592, "linear_dense_nnz": 3858432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4243456, "linear_attention_total": 2359296, "linear_attention_nnz": 883712, "linear_dense_total": 4718592, "linear_dense_nnz": 3359744}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2818048, "linear_attention_total": 2359296, "linear_attention_nnz": 513024, "linear_dense_total": 4718592, "linear_dense_nnz": 2305024}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1289216, "linear_attention_total": 2359296, "linear_attention_nnz": 462848, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1047552, "linear_attention_total": 2359296, "linear_attention_nnz": 374784, "linear_dense_total": 4718592, "linear_dense_nnz": 672768}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 942080, "linear_attention_total": 2359296, "linear_attention_nnz": 235520, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}}, "total_sparsity": 38.04062450702838, "linear_sparsity": 48.742525077160494}, "speed": {"eval_elapsed_time": 32.96429116372019, "cuda_eval_elapsed_time": 25.61453672027588}, "opt_eval_metrics": {"exact_match": 79.67833491012298, "f1": 87.14623278516426}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 64400930, "linear_total": 84934656, "linear_nnz": 40469504, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3907584, "linear_attention_total": 2359296, "linear_attention_nnz": 527360, "linear_dense_total": 4718592, "linear_dense_nnz": 3380224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4186112, "linear_attention_total": 2359296, "linear_attention_nnz": 524288, "linear_dense_total": 4718592, "linear_dense_nnz": 3661824}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4629504, "linear_attention_total": 2359296, "linear_attention_nnz": 598016, "linear_dense_total": 4718592, "linear_dense_nnz": 4031488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5038080, "linear_attention_total": 2359296, "linear_attention_nnz": 930816, "linear_dense_total": 4718592, "linear_dense_nnz": 4107264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4822016, "linear_attention_total": 2359296, "linear_attention_nnz": 824320, "linear_dense_total": 4718592, "linear_dense_nnz": 3997696}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4773888, "linear_attention_total": 2359296, "linear_attention_nnz": 746496, "linear_dense_total": 4718592, "linear_dense_nnz": 4027392}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4408320, "linear_attention_total": 2359296, "linear_attention_nnz": 670720, "linear_dense_total": 4718592, "linear_dense_nnz": 3737600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3789824, "linear_attention_total": 2359296, "linear_attention_nnz": 794624, "linear_dense_total": 4718592, "linear_dense_nnz": 2995200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2176000, "linear_attention_total": 2359296, "linear_attention_nnz": 419840, "linear_dense_total": 4718592, "linear_dense_nnz": 1756160}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1011712, "linear_attention_total": 2359296, "linear_attention_nnz": 411648, "linear_dense_total": 4718592, "linear_dense_nnz": 600064}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 903168, "linear_attention_total": 2359296, "linear_attention_nnz": 307200, "linear_dense_total": 4718592, "linear_dense_nnz": 595968}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 823296, "linear_attention_total": 2359296, "linear_attention_nnz": 207872, "linear_dense_total": 4718592, "linear_dense_nnz": 615424}}, "total_sparsity": 40.85862268737366, "linear_sparsity": 52.35218942901234}, "speed": {"eval_elapsed_time": 32.05209435708821, "cuda_eval_elapsed_time": 24.736273986816407}, "opt_eval_metrics": {"exact_match": 79.29990539262063, "f1": 87.09851869948527}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 64383586, "linear_total": 84934656, "linear_nnz": 40452096, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3881984, "linear_attention_total": 2359296, "linear_attention_nnz": 501760, "linear_dense_total": 4718592, "linear_dense_nnz": 3380224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4185088, "linear_attention_total": 2359296, "linear_attention_nnz": 528384, "linear_dense_total": 4718592, "linear_dense_nnz": 3656704}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4703232, "linear_attention_total": 2359296, "linear_attention_nnz": 581632, "linear_dense_total": 4718592, "linear_dense_nnz": 4121600}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5060608, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 4144128}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4893696, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 4060160}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4817920, "linear_attention_total": 2359296, "linear_attention_nnz": 741376, "linear_dense_total": 4718592, "linear_dense_nnz": 4076544}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4459520, "linear_attention_total": 2359296, "linear_attention_nnz": 644096, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3720192, "linear_attention_total": 2359296, "linear_attention_nnz": 757760, "linear_dense_total": 4718592, "linear_dense_nnz": 2962432}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2070528, "linear_attention_total": 2359296, "linear_attention_nnz": 380928, "linear_dense_total": 4718592, "linear_dense_nnz": 1689600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 966656, "linear_attention_total": 2359296, "linear_attention_nnz": 395264, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 874496, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 561152}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818176, "linear_attention_total": 2359296, "linear_attention_nnz": 200704, "linear_dense_total": 4718592, "linear_dense_nnz": 617472}}, "total_sparsity": 40.874550222086434, "linear_sparsity": 52.37268518518518}, "speed": {"eval_elapsed_time": 31.986000607255846, "cuda_eval_elapsed_time": 24.675214500427245}, "opt_eval_metrics": {"exact_match": 79.22421948912014, "f1": 87.0664817371684}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41117954, "linear_total": 84934656, "linear_nnz": 17159424, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879296, "linear_attention_total": 2359296, "linear_attention_nnz": 1459968, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1487616, "linear_attention_total": 2359296, "linear_attention_nnz": 930048, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2451456, "linear_attention_total": 2359296, "linear_attention_nnz": 1651200, "linear_dense_total": 4718592, "linear_dense_nnz": 800256}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959168, "linear_attention_total": 2359296, "linear_attention_nnz": 1181952, "linear_dense_total": 4718592, "linear_dense_nnz": 777216}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1876992, "linear_attention_total": 2359296, "linear_attention_nnz": 996864, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 720384, "linear_dense_total": 4718592, "linear_dense_nnz": 886272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1242624, "linear_attention_total": 2359296, "linear_attention_nnz": 595968, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1026048, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1362432, "linear_attention_total": 2359296, "linear_attention_nnz": 1029120, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784128, "linear_attention_total": 2359296, "linear_attention_nnz": 673536, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563712, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.240103802270966, "linear_sparsity": 79.7969111689815}, "speed": {"eval_elapsed_time": 21.38672148110345, "cuda_eval_elapsed_time": 14.27365227508545}, "opt_eval_metrics": {"exact_match": 78.00378429517502, "f1": 85.86131877012127}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41037314, "linear_total": 84934656, "linear_nnz": 17078784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1881600, "linear_attention_total": 2359296, "linear_attention_nnz": 1460736, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488384, "linear_attention_total": 2359296, "linear_attention_nnz": 930816, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2430720, "linear_attention_total": 2359296, "linear_attention_nnz": 1636608, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1953024, "linear_attention_total": 2359296, "linear_attention_nnz": 1172736, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1822464, "linear_attention_total": 2359296, "linear_attention_nnz": 946944, "linear_dense_total": 4718592, "linear_dense_nnz": 875520}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602816, "linear_attention_total": 2359296, "linear_attention_nnz": 719616, "linear_dense_total": 4718592, "linear_dense_nnz": 883200}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1248768, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023744, "linear_attention_total": 2359296, "linear_attention_nnz": 530688, "linear_dense_total": 4718592, "linear_dense_nnz": 493056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360128, "linear_attention_total": 2359296, "linear_attention_nnz": 1026816, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 785664, "linear_attention_total": 2359296, "linear_attention_nnz": 675072, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 562176, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.31415802270676, "linear_sparsity": 79.89185474537037}, "speed": {"eval_elapsed_time": 21.39737162971869, "cuda_eval_elapsed_time": 14.245073196411132}, "opt_eval_metrics": {"exact_match": 78.04162724692526, "f1": 85.89832211406967}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 65744386, "linear_total": 84934656, "linear_nnz": 41809920, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3775488, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 3140608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4079616, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 3477504}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4937728, "linear_attention_total": 2359296, "linear_attention_nnz": 1008640, "linear_dense_total": 4718592, "linear_dense_nnz": 3929088}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5286912, "linear_attention_total": 2359296, "linear_attention_nnz": 1197056, "linear_dense_total": 4718592, "linear_dense_nnz": 4089856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5135360, "linear_attention_total": 2359296, "linear_attention_nnz": 1181696, "linear_dense_total": 4718592, "linear_dense_nnz": 3953664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5012480, "linear_attention_total": 2359296, "linear_attention_nnz": 1005568, "linear_dense_total": 4718592, "linear_dense_nnz": 4006912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4720640, "linear_attention_total": 2359296, "linear_attention_nnz": 1043456, "linear_dense_total": 4718592, "linear_dense_nnz": 3677184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3708928, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 2777088}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2311168, "linear_attention_total": 2359296, "linear_attention_nnz": 862208, "linear_dense_total": 4718592, "linear_dense_nnz": 1448960}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1058816, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 458752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 951296, "linear_attention_total": 2359296, "linear_attention_nnz": 456704, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 831488, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 541696}}, "total_sparsity": 39.6248852522324, "linear_sparsity": 50.774016203703695}, "speed": {"eval_elapsed_time": 33.8130349079147, "cuda_eval_elapsed_time": 26.4900548248291}, "opt_eval_metrics": {"exact_match": 80.48249763481552, "f1": 87.91705961229685}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 38110440, "linear_total": 84934656, "linear_nnz": 14151910, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1521793, "linear_attention_total": 2359296, "linear_attention_nnz": 87221, "linear_dense_total": 4718592, "linear_dense_nnz": 1434572}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637844, "linear_attention_total": 2359296, "linear_attention_nnz": 157517, "linear_dense_total": 4718592, "linear_dense_nnz": 1480327}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1723746, "linear_attention_total": 2359296, "linear_attention_nnz": 188172, "linear_dense_total": 4718592, "linear_dense_nnz": 1535574}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742961, "linear_attention_total": 2359296, "linear_attention_nnz": 230341, "linear_dense_total": 4718592, "linear_dense_nnz": 1512620}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1687428, "linear_attention_total": 2359296, "linear_attention_nnz": 240387, "linear_dense_total": 4718592, "linear_dense_nnz": 1447041}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1623377, "linear_attention_total": 2359296, "linear_attention_nnz": 195780, "linear_dense_total": 4718592, "linear_dense_nnz": 1427597}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1429982, "linear_attention_total": 2359296, "linear_attention_nnz": 184963, "linear_dense_total": 4718592, "linear_dense_nnz": 1245019}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130199, "linear_attention_total": 2359296, "linear_attention_nnz": 172954, "linear_dense_total": 4718592, "linear_dense_nnz": 957245}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 773896, "linear_attention_total": 2359296, "linear_attention_nnz": 138133, "linear_dense_total": 4718592, "linear_dense_nnz": 635763}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417863, "linear_attention_total": 2359296, "linear_attention_nnz": 112972, "linear_dense_total": 4718592, "linear_dense_nnz": 304891}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279992, "linear_attention_total": 2359296, "linear_attention_nnz": 75446, "linear_dense_total": 4718592, "linear_dense_nnz": 204546}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 182829, "linear_attention_total": 2359296, "linear_attention_nnz": 38439, "linear_dense_total": 4718592, "linear_dense_nnz": 144390}}, "total_sparsity": 65.00199746198996, "linear_sparsity": 83.3378850677867}, "speed": {"eval_elapsed_time": 42.719326278194785, "cuda_eval_elapsed_time": 35.30916271209717}, "opt_eval_metrics": {"exact_match": 80.22705771050141, "f1": 88.08154392563726}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37366845, "linear_total": 84934656, "linear_nnz": 13408315, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1442154, "linear_attention_total": 2359296, "linear_attention_nnz": 79341, "linear_dense_total": 4718592, "linear_dense_nnz": 1362813}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1557975, "linear_attention_total": 2359296, "linear_attention_nnz": 146964, "linear_dense_total": 4718592, "linear_dense_nnz": 1411011}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637409, "linear_attention_total": 2359296, "linear_attention_nnz": 173655, "linear_dense_total": 4718592, "linear_dense_nnz": 1463754}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1655712, "linear_attention_total": 2359296, "linear_attention_nnz": 213353, "linear_dense_total": 4718592, "linear_dense_nnz": 1442359}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601748, "linear_attention_total": 2359296, "linear_attention_nnz": 221518, "linear_dense_total": 4718592, "linear_dense_nnz": 1380230}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1539647, "linear_attention_total": 2359296, "linear_attention_nnz": 179373, "linear_dense_total": 4718592, "linear_dense_nnz": 1360274}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1352289, "linear_attention_total": 2359296, "linear_attention_nnz": 168393, "linear_dense_total": 4718592, "linear_dense_nnz": 1183896}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1066215, "linear_attention_total": 2359296, "linear_attention_nnz": 159612, "linear_dense_total": 4718592, "linear_dense_nnz": 906603}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727923, "linear_attention_total": 2359296, "linear_attention_nnz": 127230, "linear_dense_total": 4718592, "linear_dense_nnz": 600693}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 390947, "linear_attention_total": 2359296, "linear_attention_nnz": 105257, "linear_dense_total": 4718592, "linear_dense_nnz": 285690}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 262617, "linear_attention_total": 2359296, "linear_attention_nnz": 70746, "linear_dense_total": 4718592, "linear_dense_nnz": 191871}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 173679, "linear_attention_total": 2359296, "linear_attention_nnz": 36271, "linear_dense_total": 4718592, "linear_dense_nnz": 137408}}, "total_sparsity": 65.68486388119823, "linear_sparsity": 84.21337575088313}, "speed": {"eval_elapsed_time": 42.675803440622985, "cuda_eval_elapsed_time": 35.31425653076172}, "opt_eval_metrics": {"exact_match": 80.53926206244087, "f1": 88.07603620459668}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 45252556, "linear_total": 84934656, "linear_nnz": 21294026, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2152743, "linear_attention_total": 2359296, "linear_attention_nnz": 158912, "linear_dense_total": 4718592, "linear_dense_nnz": 1993831}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2265132, "linear_attention_total": 2359296, "linear_attention_nnz": 234395, "linear_dense_total": 4718592, "linear_dense_nnz": 2030737}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415512, "linear_attention_total": 2359296, "linear_attention_nnz": 301048, "linear_dense_total": 4718592, "linear_dense_nnz": 2114464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465567, "linear_attention_total": 2359296, "linear_attention_nnz": 358791, "linear_dense_total": 4718592, "linear_dense_nnz": 2106776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2457267, "linear_attention_total": 2359296, "linear_attention_nnz": 398673, "linear_dense_total": 4718592, "linear_dense_nnz": 2058594}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2410577, "linear_attention_total": 2359296, "linear_attention_nnz": 367333, "linear_dense_total": 4718592, "linear_dense_nnz": 2043244}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2206780, "linear_attention_total": 2359296, "linear_attention_nnz": 344288, "linear_dense_total": 4718592, "linear_dense_nnz": 1862492}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1819031, "linear_attention_total": 2359296, "linear_attention_nnz": 304514, "linear_dense_total": 4718592, "linear_dense_nnz": 1514517}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1364821, "linear_attention_total": 2359296, "linear_attention_nnz": 265513, "linear_dense_total": 4718592, "linear_dense_nnz": 1099308}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 828990, "linear_attention_total": 2359296, "linear_attention_nnz": 201714, "linear_dense_total": 4718592, "linear_dense_nnz": 627276}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574541, "linear_attention_total": 2359296, "linear_attention_nnz": 134277, "linear_dense_total": 4718592, "linear_dense_nnz": 440264}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 333065, "linear_attention_total": 2359296, "linear_attention_nnz": 63309, "linear_dense_total": 4718592, "linear_dense_nnz": 269756}}, "total_sparsity": 58.4431701722824, "linear_sparsity": 74.92893124804085}, "speed": {"eval_elapsed_time": 44.979358388110995, "cuda_eval_elapsed_time": 37.63941863250732}, "opt_eval_metrics": {"exact_match": 81.40018921475875, "f1": 88.66263407974378}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 38467586, "linear_total": 84934656, "linear_nnz": 14509056, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1740288, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 940032, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 448512}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1992192, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1728000, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1651200, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 717312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245696, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877056, "linear_attention_total": 2359296, "linear_attention_nnz": 442368, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049088, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 629760, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 233472}}, "total_sparsity": 64.67401918059409, "linear_sparsity": 82.9173900462963}, "speed": {"eval_elapsed_time": 18.694298257119954, "cuda_eval_elapsed_time": 11.534610126495362}, "opt_eval_metrics": {"exact_match": 77.9848628192999, "f1": 85.88807770994393}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 38065154, "linear_total": 84934656, "linear_nnz": 14106624, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1669632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 913920, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 422400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969152, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 631296}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1559040, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1219584, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 471552}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 955392, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1090560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 431616, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.04358500448319, "linear_sparsity": 83.3912037037037}, "speed": {"eval_elapsed_time": 18.590640037320554, "cuda_eval_elapsed_time": 11.486401634216309}, "opt_eval_metrics": {"exact_match": 77.94701986754967, "f1": 85.90050035022541}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38095874, "linear_total": 84934656, "linear_nnz": 14137344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907776, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1967616, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 591360}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1711104, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1608192, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1214976, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 625152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1161216, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 953856, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 413184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1041408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 482304, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 757248, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.0153738728886, "linear_sparsity": 83.35503472222221}, "speed": {"eval_elapsed_time": 18.49867358384654, "cuda_eval_elapsed_time": 11.375749713897706}, "opt_eval_metrics": {"exact_match": 77.43614001892148, "f1": 85.51882546766822}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35435778, "linear_total": 84934656, "linear_nnz": 11477248, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887040, "linear_attention_total": 2359296, "linear_attention_nnz": 384768, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1057792, "linear_attention_total": 2359296, "linear_attention_nnz": 355840, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1285888, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497088, "linear_attention_total": 2359296, "linear_attention_nnz": 672256, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1350912, "linear_attention_total": 2359296, "linear_attention_nnz": 418560, "linear_dense_total": 4718592, "linear_dense_nnz": 932352}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1395712, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1154816, "linear_attention_total": 2359296, "linear_attention_nnz": 498944, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059840, "linear_attention_total": 2359296, "linear_attention_nnz": 497664, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 609024, "linear_attention_total": 2359296, "linear_attention_nnz": 297216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 436224, "linear_attention_total": 2359296, "linear_attention_nnz": 316416, "linear_dense_total": 4718592, "linear_dense_nnz": 119808}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 256512, "linear_dense_total": 4718592, "linear_dense_nnz": 115200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371200, "linear_attention_total": 2359296, "linear_attention_nnz": 150016, "linear_dense_total": 4718592, "linear_dense_nnz": 221184}}, "total_sparsity": 67.45822277621669, "linear_sparsity": 86.4869671103395}, "speed": {"eval_elapsed_time": 20.705443068873137, "cuda_eval_elapsed_time": 13.584790561676026}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.28341140334766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 49759613, "linear_total": 84934656, "linear_nnz": 25846272, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3251712, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2803200, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1918464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3320832, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3353088, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2075136}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2469888, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2322432, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1929216}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2098176, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1508352}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1641984, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1248768}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638912, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566784, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1075200, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 583680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1304064, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1009152}}, "total_sparsity": 54.304199529987116, "linear_sparsity": 69.56922743055556}, "speed": {"eval_elapsed_time": 21.616635580081493, "cuda_eval_elapsed_time": 14.452293647766114}, "opt_eval_metrics": {"exact_match": 77.8713339640492, "f1": 85.86552240887988}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 47529298, "linear_total": 84934656, "linear_nnz": 23617536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2446848, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1660416}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2978304, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1995264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2216448, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1724928}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1824768, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1333248}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526784, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1133568}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35236717199184, "linear_sparsity": 72.19328703703704}, "speed": {"eval_elapsed_time": 20.8758007818833, "cuda_eval_elapsed_time": 13.721765300750732}, "opt_eval_metrics": {"exact_match": 78.06054872280038, "f1": 85.94002543374285}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47521613, "linear_total": 84934656, "linear_nnz": 23609856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2445312, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1658880}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2976768, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1993728}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214912, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1723392}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1823232, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1525248, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1132032}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35942454654601, "linear_sparsity": 72.2023292824074}, "speed": {"eval_elapsed_time": 20.88254290400073, "cuda_eval_elapsed_time": 13.714137535095215}, "opt_eval_metrics": {"exact_match": 78.10785241248817, "f1": 86.00835164251778}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35398714, "linear_total": 84934656, "linear_nnz": 11493376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907264, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1074176, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253376, "linear_attention_total": 2359296, "linear_attention_nnz": 402432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1508352, "linear_attention_total": 2359296, "linear_attention_nnz": 681984, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1328640, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1422848, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1094656, "linear_attention_total": 2359296, "linear_attention_nnz": 449536, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1102848, "linear_attention_total": 2359296, "linear_attention_nnz": 577536, "linear_dense_total": 4718592, "linear_dense_nnz": 525312}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 628224, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 434176, "linear_attention_total": 2359296, "linear_attention_nnz": 320512, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 377344, "linear_attention_total": 2359296, "linear_attention_nnz": 256000, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 361472, "linear_attention_total": 2359296, "linear_attention_nnz": 146432, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 67.49225980035152, "linear_sparsity": 86.46797839506173}, "speed": {"eval_elapsed_time": 20.651509277056903, "cuda_eval_elapsed_time": 13.485522186279297}, "opt_eval_metrics": {"exact_match": 77.18070009460737, "f1": 85.6109462422114}}, "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10_d0.25/checkpoint-210000": {"stats": {"total": 334094338, "nnz": 88857851, "linear_total": 301989888, "linear_nnz": 56885248, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1618944, "linear_attention_total": 4194304, "linear_attention_nnz": 783360, "linear_dense_total": 8388608, "linear_dense_nnz": 835584}, "1": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1602560, "linear_attention_total": 4194304, "linear_attention_nnz": 326656, "linear_dense_total": 8388608, "linear_dense_nnz": 1275904}, "2": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1665024, "linear_attention_total": 4194304, "linear_attention_nnz": 305152, "linear_dense_total": 8388608, "linear_dense_nnz": 1359872}, "3": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2312192, "linear_attention_total": 4194304, "linear_attention_nnz": 626688, "linear_dense_total": 8388608, "linear_dense_nnz": 1685504}, "4": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2137088, "linear_attention_total": 4194304, "linear_attention_nnz": 369664, "linear_dense_total": 8388608, "linear_dense_nnz": 1767424}, "5": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2337792, "linear_attention_total": 4194304, "linear_attention_nnz": 463872, "linear_dense_total": 8388608, "linear_dense_nnz": 1873920}, "6": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2349056, "linear_attention_total": 4194304, "linear_attention_nnz": 294912, "linear_dense_total": 8388608, "linear_dense_nnz": 2054144}, "7": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2386944, "linear_attention_total": 4194304, "linear_attention_nnz": 613376, "linear_dense_total": 8388608, "linear_dense_nnz": 1773568}, "8": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2177024, "linear_attention_total": 4194304, "linear_attention_nnz": 208896, "linear_dense_total": 8388608, "linear_dense_nnz": 1968128}, "9": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2910208, "linear_attention_total": 4194304, "linear_attention_nnz": 923648, "linear_dense_total": 8388608, "linear_dense_nnz": 1986560}, "10": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 4046848, "linear_attention_total": 4194304, "linear_attention_nnz": 1636352, "linear_dense_total": 8388608, "linear_dense_nnz": 2410496}, "11": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 4086784, "linear_attention_total": 4194304, "linear_attention_nnz": 1575936, "linear_dense_total": 8388608, "linear_dense_nnz": 2510848}, "12": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3863552, "linear_attention_total": 4194304, "linear_attention_nnz": 1203200, "linear_dense_total": 8388608, "linear_dense_nnz": 2660352}, "13": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 4635648, "linear_attention_total": 4194304, "linear_attention_nnz": 2030592, "linear_dense_total": 8388608, "linear_dense_nnz": 2605056}, "14": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 4085760, "linear_attention_total": 4194304, "linear_attention_nnz": 1785856, "linear_dense_total": 8388608, "linear_dense_nnz": 2299904}, "15": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3646464, "linear_attention_total": 4194304, "linear_attention_nnz": 1946624, "linear_dense_total": 8388608, "linear_dense_nnz": 1699840}, "16": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3050496, "linear_attention_total": 4194304, "linear_attention_nnz": 1647616, "linear_dense_total": 8388608, "linear_dense_nnz": 1402880}, "17": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2635776, "linear_attention_total": 4194304, "linear_attention_nnz": 1538048, "linear_dense_total": 8388608, "linear_dense_nnz": 1097728}, "18": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2070528, "linear_attention_total": 4194304, "linear_attention_nnz": 1169408, "linear_dense_total": 8388608, "linear_dense_nnz": 901120}, "19": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1346560, "linear_attention_total": 4194304, "linear_attention_nnz": 607232, "linear_dense_total": 8388608, "linear_dense_nnz": 739328}, "20": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 754688, "linear_attention_total": 4194304, "linear_attention_nnz": 396288, "linear_dense_total": 8388608, "linear_dense_nnz": 358400}, "21": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 479232, "linear_attention_total": 4194304, "linear_attention_nnz": 284672, "linear_dense_total": 8388608, "linear_dense_nnz": 194560}, "22": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 250880, "linear_attention_total": 4194304, "linear_attention_nnz": 70656, "linear_dense_total": 8388608, "linear_dense_nnz": 180224}, "23": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 435200, "linear_attention_total": 4194304, "linear_attention_nnz": 111616, "linear_dense_total": 8388608, "linear_dense_nnz": 323584}}, "total_sparsity": 73.40336518962498, "linear_sparsity": 81.16319444444444}, "speed": {"eval_elapsed_time": 49.08256564009935, "cuda_eval_elapsed_time": 41.496326583862306}, "opt_eval_metrics": {"exact_match": 83.78429517502366, "f1": 90.32458147221426}}, "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10_d0.25/checkpoint-221320": {"stats": {"total": 334094338, "nnz": 88581359, "linear_total": 301989888, "linear_nnz": 56608768, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1598464, "linear_attention_total": 4194304, "linear_attention_nnz": 766976, "linear_dense_total": 8388608, "linear_dense_nnz": 831488}, "1": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1612800, "linear_attention_total": 4194304, "linear_attention_nnz": 338944, "linear_dense_total": 8388608, "linear_dense_nnz": 1273856}, "2": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1662976, "linear_attention_total": 4194304, "linear_attention_nnz": 305152, "linear_dense_total": 8388608, "linear_dense_nnz": 1357824}, "3": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2307072, "linear_attention_total": 4194304, "linear_attention_nnz": 621568, "linear_dense_total": 8388608, "linear_dense_nnz": 1685504}, "4": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2145280, "linear_attention_total": 4194304, "linear_attention_nnz": 377856, "linear_dense_total": 8388608, "linear_dense_nnz": 1767424}, "5": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2332672, "linear_attention_total": 4194304, "linear_attention_nnz": 460800, "linear_dense_total": 8388608, "linear_dense_nnz": 1871872}, "6": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2363392, "linear_attention_total": 4194304, "linear_attention_nnz": 309248, "linear_dense_total": 8388608, "linear_dense_nnz": 2054144}, "7": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2357248, "linear_attention_total": 4194304, "linear_attention_nnz": 583680, "linear_dense_total": 8388608, "linear_dense_nnz": 1773568}, "8": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2181120, "linear_attention_total": 4194304, "linear_attention_nnz": 215040, "linear_dense_total": 8388608, "linear_dense_nnz": 1966080}, "9": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2903040, "linear_attention_total": 4194304, "linear_attention_nnz": 916480, "linear_dense_total": 8388608, "linear_dense_nnz": 1986560}, "10": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 4004864, "linear_attention_total": 4194304, "linear_attention_nnz": 1596416, "linear_dense_total": 8388608, "linear_dense_nnz": 2408448}, "11": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 4124672, "linear_attention_total": 4194304, "linear_attention_nnz": 1615872, "linear_dense_total": 8388608, "linear_dense_nnz": 2508800}, "12": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3863552, "linear_attention_total": 4194304, "linear_attention_nnz": 1205248, "linear_dense_total": 8388608, "linear_dense_nnz": 2658304}, "13": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 4609024, "linear_attention_total": 4194304, "linear_attention_nnz": 2006016, "linear_dense_total": 8388608, "linear_dense_nnz": 2603008}, "14": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 4018176, "linear_attention_total": 4194304, "linear_attention_nnz": 1718272, "linear_dense_total": 8388608, "linear_dense_nnz": 2299904}, "15": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3635200, "linear_attention_total": 4194304, "linear_attention_nnz": 1935360, "linear_dense_total": 8388608, "linear_dense_nnz": 1699840}, "16": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3015680, "linear_attention_total": 4194304, "linear_attention_nnz": 1612800, "linear_dense_total": 8388608, "linear_dense_nnz": 1402880}, "17": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2599936, "linear_attention_total": 4194304, "linear_attention_nnz": 1502208, "linear_dense_total": 8388608, "linear_dense_nnz": 1097728}, "18": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2068480, "linear_attention_total": 4194304, "linear_attention_nnz": 1167360, "linear_dense_total": 8388608, "linear_dense_nnz": 901120}, "19": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1340416, "linear_attention_total": 4194304, "linear_attention_nnz": 601088, "linear_dense_total": 8388608, "linear_dense_nnz": 739328}, "20": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 720896, "linear_attention_total": 4194304, "linear_attention_nnz": 364544, "linear_dense_total": 8388608, "linear_dense_nnz": 356352}, "21": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 468992, "linear_attention_total": 4194304, "linear_attention_nnz": 274432, "linear_dense_total": 8388608, "linear_dense_nnz": 194560}, "22": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 250880, "linear_attention_total": 4194304, "linear_attention_nnz": 70656, "linear_dense_total": 8388608, "linear_dense_nnz": 180224}, "23": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 423936, "linear_attention_total": 4194304, "linear_attention_nnz": 102400, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}}, "total_sparsity": 73.4861238504437, "linear_sparsity": 81.25474717881944}, "speed": {"eval_elapsed_time": 48.98561626393348, "cuda_eval_elapsed_time": 41.275371505737304}, "opt_eval_metrics": {"exact_match": 83.66130558183538, "f1": 90.22195941338013}}, "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-215000": {"stats": {"total": 334094338, "nnz": 68649433, "linear_total": 301989888, "linear_nnz": 36684800, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1167360, "linear_attention_total": 4194304, "linear_attention_nnz": 974848, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 576512, "linear_attention_total": 4194304, "linear_attention_nnz": 306176, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 584704, "linear_attention_total": 4194304, "linear_attention_nnz": 297984, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1248256, "linear_attention_total": 4194304, "linear_attention_nnz": 834560, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 848896, "linear_attention_total": 4194304, "linear_attention_nnz": 381952, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 959488, "linear_attention_total": 4194304, "linear_attention_nnz": 406528, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1130496, "linear_attention_total": 4194304, "linear_attention_nnz": 522240, "linear_dense_total": 8388608, "linear_dense_nnz": 608256}, "7": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1209344, "linear_attention_total": 4194304, "linear_attention_nnz": 771072, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1076224, "linear_attention_total": 4194304, "linear_attention_nnz": 414720, "linear_dense_total": 8388608, "linear_dense_nnz": 661504}, "9": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1839104, "linear_attention_total": 4194304, "linear_attention_nnz": 1091584, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2709504, "linear_attention_total": 4194304, "linear_attention_nnz": 1714176, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2908160, "linear_attention_total": 4194304, "linear_attention_nnz": 1875968, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3074048, "linear_attention_total": 4194304, "linear_attention_nnz": 1832960, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3335168, "linear_attention_total": 4194304, "linear_attention_nnz": 2155520, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2851840, "linear_attention_total": 4194304, "linear_attention_nnz": 1942528, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2761728, "linear_attention_total": 4194304, "linear_attention_nnz": 2079744, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2316288, "linear_attention_total": 4194304, "linear_attention_nnz": 1843200, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1950720, "linear_attention_total": 4194304, "linear_attention_nnz": 1582080, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1757184, "linear_attention_total": 4194304, "linear_attention_nnz": 1435648, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 988160, "linear_attention_total": 4194304, "linear_attention_nnz": 717824, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 447488, "linear_attention_total": 4194304, "linear_attention_nnz": 334848, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 436224, "linear_attention_total": 4194304, "linear_attention_nnz": 358400, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 214016, "linear_attention_total": 4194304, "linear_attention_nnz": 134144, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 293888, "linear_attention_total": 4194304, "linear_attention_nnz": 111616, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 79.45208128609471, "linear_sparsity": 87.85230848524306}, "speed": {"eval_elapsed_time": 44.93039320781827, "cuda_eval_elapsed_time": 37.50764268493653}, "opt_eval_metrics": {"exact_match": 82.33680227057711, "f1": 89.04761607630476}}, "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-220000": {"stats": {"total": 334094338, "nnz": 68429014, "linear_total": 301989888, "linear_nnz": 36464640, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1181696, "linear_attention_total": 4194304, "linear_attention_nnz": 989184, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 593920, "linear_attention_total": 4194304, "linear_attention_nnz": 323584, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 573440, "linear_attention_total": 4194304, "linear_attention_nnz": 286720, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1215488, "linear_attention_total": 4194304, "linear_attention_nnz": 801792, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 863232, "linear_attention_total": 4194304, "linear_attention_nnz": 396288, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 958464, "linear_attention_total": 4194304, "linear_attention_nnz": 405504, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1124352, "linear_attention_total": 4194304, "linear_attention_nnz": 520192, "linear_dense_total": 8388608, "linear_dense_nnz": 604160}, "7": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1203200, "linear_attention_total": 4194304, "linear_attention_nnz": 764928, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1083392, "linear_attention_total": 4194304, "linear_attention_nnz": 423936, "linear_dense_total": 8388608, "linear_dense_nnz": 659456}, "9": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1817600, "linear_attention_total": 4194304, "linear_attention_nnz": 1070080, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2741248, "linear_attention_total": 4194304, "linear_attention_nnz": 1745920, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2934784, "linear_attention_total": 4194304, "linear_attention_nnz": 1902592, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3023872, "linear_attention_total": 4194304, "linear_attention_nnz": 1782784, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3326976, "linear_attention_total": 4194304, "linear_attention_nnz": 2147328, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2827264, "linear_attention_total": 4194304, "linear_attention_nnz": 1917952, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2731008, "linear_attention_total": 4194304, "linear_attention_nnz": 2049024, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2293760, "linear_attention_total": 4194304, "linear_attention_nnz": 1820672, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1931264, "linear_attention_total": 4194304, "linear_attention_nnz": 1562624, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1712128, "linear_attention_total": 4194304, "linear_attention_nnz": 1390592, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 958464, "linear_attention_total": 4194304, "linear_attention_nnz": 688128, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 439296, "linear_attention_total": 4194304, "linear_attention_nnz": 326656, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 421888, "linear_attention_total": 4194304, "linear_attention_nnz": 344064, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 208896, "linear_attention_total": 4194304, "linear_attention_nnz": 129024, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 299008, "linear_attention_total": 4194304, "linear_attention_nnz": 116736, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 79.51805636406804, "linear_sparsity": 87.92521158854166}, "speed": {"eval_elapsed_time": 44.93571184715256, "cuda_eval_elapsed_time": 37.54432637023926}, "opt_eval_metrics": {"exact_match": 82.13812677388836, "f1": 89.03656646065757}}, "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-221320": {"stats": {"total": 334094338, "nnz": 68456822, "linear_total": 301989888, "linear_nnz": 36492288, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1171456, "linear_attention_total": 4194304, "linear_attention_nnz": 978944, "linear_dense_total": 8388608, "linear_dense_nnz": 192512}, "1": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 589824, "linear_attention_total": 4194304, "linear_attention_nnz": 319488, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "2": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 580608, "linear_attention_total": 4194304, "linear_attention_nnz": 293888, "linear_dense_total": 8388608, "linear_dense_nnz": 286720}, "3": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1209344, "linear_attention_total": 4194304, "linear_attention_nnz": 795648, "linear_dense_total": 8388608, "linear_dense_nnz": 413696}, "4": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 862208, "linear_attention_total": 4194304, "linear_attention_nnz": 395264, "linear_dense_total": 8388608, "linear_dense_nnz": 466944}, "5": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 945152, "linear_attention_total": 4194304, "linear_attention_nnz": 392192, "linear_dense_total": 8388608, "linear_dense_nnz": 552960}, "6": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1127424, "linear_attention_total": 4194304, "linear_attention_nnz": 523264, "linear_dense_total": 8388608, "linear_dense_nnz": 604160}, "7": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1222656, "linear_attention_total": 4194304, "linear_attention_nnz": 784384, "linear_dense_total": 8388608, "linear_dense_nnz": 438272}, "8": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1076224, "linear_attention_total": 4194304, "linear_attention_nnz": 416768, "linear_dense_total": 8388608, "linear_dense_nnz": 659456}, "9": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1816576, "linear_attention_total": 4194304, "linear_attention_nnz": 1069056, "linear_dense_total": 8388608, "linear_dense_nnz": 747520}, "10": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2749440, "linear_attention_total": 4194304, "linear_attention_nnz": 1754112, "linear_dense_total": 8388608, "linear_dense_nnz": 995328}, "11": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2954240, "linear_attention_total": 4194304, "linear_attention_nnz": 1922048, "linear_dense_total": 8388608, "linear_dense_nnz": 1032192}, "12": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3016704, "linear_attention_total": 4194304, "linear_attention_nnz": 1775616, "linear_dense_total": 8388608, "linear_dense_nnz": 1241088}, "13": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 3329024, "linear_attention_total": 4194304, "linear_attention_nnz": 2149376, "linear_dense_total": 8388608, "linear_dense_nnz": 1179648}, "14": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2864128, "linear_attention_total": 4194304, "linear_attention_nnz": 1954816, "linear_dense_total": 8388608, "linear_dense_nnz": 909312}, "15": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2747392, "linear_attention_total": 4194304, "linear_attention_nnz": 2065408, "linear_dense_total": 8388608, "linear_dense_nnz": 681984}, "16": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 2296832, "linear_attention_total": 4194304, "linear_attention_nnz": 1823744, "linear_dense_total": 8388608, "linear_dense_nnz": 473088}, "17": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1927168, "linear_attention_total": 4194304, "linear_attention_nnz": 1558528, "linear_dense_total": 8388608, "linear_dense_nnz": 368640}, "18": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 1678336, "linear_attention_total": 4194304, "linear_attention_nnz": 1356800, "linear_dense_total": 8388608, "linear_dense_nnz": 321536}, "19": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 958464, "linear_attention_total": 4194304, "linear_attention_nnz": 688128, "linear_dense_total": 8388608, "linear_dense_nnz": 270336}, "20": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 439296, "linear_attention_total": 4194304, "linear_attention_nnz": 326656, "linear_dense_total": 8388608, "linear_dense_nnz": 112640}, "21": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 421888, "linear_attention_total": 4194304, "linear_attention_nnz": 344064, "linear_dense_total": 8388608, "linear_dense_nnz": 77824}, "22": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 208896, "linear_attention_total": 4194304, "linear_attention_nnz": 129024, "linear_dense_total": 8388608, "linear_dense_nnz": 79872}, "23": {"total": 1024, "nnz": 1024, "linear_total": 12582912, "linear_nnz": 299008, "linear_attention_total": 4194304, "linear_attention_nnz": 116736, "linear_dense_total": 8388608, "linear_dense_nnz": 182272}}, "total_sparsity": 79.50973296650122, "linear_sparsity": 87.91605631510416}, "speed": {"eval_elapsed_time": 44.935436787083745, "cuda_eval_elapsed_time": 37.53598588562012}, "opt_eval_metrics": {"exact_match": 82.30842005676443, "f1": 89.04987146464723}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold_apme-sigmoied_threshold_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 61067266, "linear_total": 84934656, "linear_nnz": 37108736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3221504, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 2607104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3504128, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 2899968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4439040, "linear_attention_total": 2359296, "linear_attention_nnz": 730112, "linear_dense_total": 4718592, "linear_dense_nnz": 3708928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4859904, "linear_attention_total": 2359296, "linear_attention_nnz": 1044480, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4734976, "linear_attention_total": 2359296, "linear_attention_nnz": 1012736, "linear_dense_total": 4718592, "linear_dense_nnz": 3722240}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4660224, "linear_attention_total": 2359296, "linear_attention_nnz": 882688, "linear_dense_total": 4718592, "linear_dense_nnz": 3777536}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4239360, "linear_attention_total": 2359296, "linear_attention_nnz": 980992, "linear_dense_total": 4718592, "linear_dense_nnz": 3258368}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3137536, "linear_attention_total": 2359296, "linear_attention_nnz": 903168, "linear_dense_total": 4718592, "linear_dense_nnz": 2234368}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835008, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 1124352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877568, "linear_attention_total": 2359296, "linear_attention_nnz": 552960, "linear_dense_total": 4718592, "linear_dense_nnz": 324608}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852992, "linear_attention_total": 2359296, "linear_attention_nnz": 401408, "linear_dense_total": 4718592, "linear_dense_nnz": 451584}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746496, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 501760}}, "total_sparsity": 43.920030037508496, "linear_sparsity": 56.309076003086425}, "speed": {"eval_elapsed_time": 31.782106802333146, "cuda_eval_elapsed_time": 24.480328201293947}, "opt_eval_metrics": {"exact_match": 79.64049195837275, "f1": 87.40026291426761}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 43891202, "linear_total": 84934656, "linear_nnz": 19932672, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045184, "linear_attention_total": 2359296, "linear_attention_nnz": 427776, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102784, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 1708032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2424576, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 1955328}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2502912, "linear_attention_total": 2359296, "linear_attention_nnz": 579840, "linear_dense_total": 4718592, "linear_dense_nnz": 1923072}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2376960, "linear_attention_total": 2359296, "linear_attention_nnz": 539904, "linear_dense_total": 4718592, "linear_dense_nnz": 1837056}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2201856, "linear_attention_total": 2359296, "linear_attention_nnz": 424704, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1907712, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 1468416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1580544, "linear_attention_total": 2359296, "linear_attention_nnz": 428544, "linear_dense_total": 4718592, "linear_dense_nnz": 1152000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1095168, "linear_attention_total": 2359296, "linear_attention_nnz": 397824, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527616, "linear_attention_total": 2359296, "linear_attention_nnz": 235776, "linear_dense_total": 4718592, "linear_dense_nnz": 291840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 428544, "linear_attention_total": 2359296, "linear_attention_nnz": 182784, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738816, "linear_attention_total": 2359296, "linear_attention_nnz": 112128, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}}, "total_sparsity": 59.6933438975695, "linear_sparsity": 76.53175636574075}, "speed": {"eval_elapsed_time": 29.958857133984566, "cuda_eval_elapsed_time": 22.747020225524903}, "opt_eval_metrics": {"exact_match": 79.15799432355723, "f1": 86.94169166073364}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49229570, "linear_total": 84934656, "linear_nnz": 25271040, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214400, "linear_attention_total": 2359296, "linear_attention_nnz": 721408, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2390784, "linear_attention_total": 2359296, "linear_attention_nnz": 635136, "linear_dense_total": 4718592, "linear_dense_nnz": 1755648}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850560, "linear_attention_total": 2359296, "linear_attention_nnz": 972032, "linear_dense_total": 4718592, "linear_dense_nnz": 1878528}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3188736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 1932288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3149824, "linear_attention_total": 2359296, "linear_attention_nnz": 1260544, "linear_dense_total": 4718592, "linear_dense_nnz": 1889280}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 1784832}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2455040, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015744, "linear_attention_total": 2359296, "linear_attention_nnz": 988160, "linear_dense_total": 4718592, "linear_dense_nnz": 1027584}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1550080, "linear_attention_total": 2359296, "linear_attention_nnz": 903424, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 886784, "linear_attention_total": 2359296, "linear_attention_nnz": 636416, "linear_dense_total": 4718592, "linear_dense_nnz": 250368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 682752, "linear_attention_total": 2359296, "linear_attention_nnz": 484608, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 980224, "linear_attention_total": 2359296, "linear_attention_nnz": 313600, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}}, "total_sparsity": 54.79095450471988, "linear_sparsity": 70.2464916087963}, "speed": {"eval_elapsed_time": 27.08285549096763, "cuda_eval_elapsed_time": 19.890604362487792}, "opt_eval_metrics": {"exact_match": 80.6244087038789, "f1": 88.07723643002453}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 42173698, "linear_total": 84934656, "linear_nnz": 18215168, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1516544, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1758464, "linear_attention_total": 2359296, "linear_attention_nnz": 564992, "linear_dense_total": 4718592, "linear_dense_nnz": 1193472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2030080, "linear_attention_total": 2359296, "linear_attention_nnz": 646144, "linear_dense_total": 4718592, "linear_dense_nnz": 1383936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2328832, "linear_attention_total": 2359296, "linear_attention_nnz": 969472, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2283264, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1425408}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2099200, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 1396224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1846784, "linear_attention_total": 2359296, "linear_attention_nnz": 774656, "linear_dense_total": 4718592, "linear_dense_nnz": 1072128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1589760, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 783360}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 967424, "linear_attention_total": 2359296, "linear_attention_nnz": 520448, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 617216, "linear_attention_total": 2359296, "linear_attention_nnz": 435968, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 521984, "linear_attention_total": 2359296, "linear_attention_nnz": 354560, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655616, "linear_attention_total": 2359296, "linear_attention_nnz": 231680, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.27058124647028, "linear_sparsity": 78.55390383873457}, "speed": {"eval_elapsed_time": 23.62707085069269, "cuda_eval_elapsed_time": 16.44181579208374}, "opt_eval_metrics": {"exact_match": 79.25260170293284, "f1": 86.93528973939952}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42038274, "linear_total": 84934656, "linear_nnz": 18079744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1493248, "linear_attention_total": 2359296, "linear_attention_nnz": 519424, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757440, "linear_attention_total": 2359296, "linear_attention_nnz": 565504, "linear_dense_total": 4718592, "linear_dense_nnz": 1191936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028800, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1382400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2297088, "linear_attention_total": 2359296, "linear_attention_nnz": 937728, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2270464, "linear_attention_total": 2359296, "linear_attention_nnz": 846592, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081792, "linear_attention_total": 2359296, "linear_attention_nnz": 688640, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815296, "linear_attention_total": 2359296, "linear_attention_nnz": 744704, "linear_dense_total": 4718592, "linear_dense_nnz": 1070592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1613312, "linear_attention_total": 2359296, "linear_attention_nnz": 831488, "linear_dense_total": 4718592, "linear_dense_nnz": 781824}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 522496, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 594944, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 513792, "linear_attention_total": 2359296, "linear_attention_nnz": 346368, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 644096, "linear_attention_total": 2359296, "linear_attention_nnz": 220160, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.39494531824976, "linear_sparsity": 78.7133487654321}, "speed": {"eval_elapsed_time": 23.622337056789547, "cuda_eval_elapsed_time": 16.405798454284668}, "opt_eval_metrics": {"exact_match": 79.2620624408704, "f1": 86.97825692623259}}}, "base_speed_report": {"eval_elapsed_time": 45.63197132572532, "cuda_eval_elapsed_time": 38.594393005371096}} \ No newline at end of file diff --git a/analysis/files/results/results2.json b/analysis/files/results/results2.json deleted file mode 100644 index 380e4742..00000000 --- a/analysis/files/results/results2.json +++ /dev/null @@ -1 +0,0 @@ -{"checkpoints": {"/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42067458, "linear_total": 84934656, "linear_nnz": 18108928, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437184, "linear_attention_total": 2359296, "linear_attention_nnz": 472576, "linear_dense_total": 4718592, "linear_dense_nnz": 964608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1754624, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015488, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2276608, "linear_attention_total": 2359296, "linear_attention_nnz": 951040, "linear_dense_total": 4718592, "linear_dense_nnz": 1325568}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2280448, "linear_attention_total": 2359296, "linear_attention_nnz": 861184, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2123008, "linear_attention_total": 2359296, "linear_attention_nnz": 779008, "linear_dense_total": 4718592, "linear_dense_nnz": 1344000}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841152, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1041408}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553664, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042432, "linear_attention_total": 2359296, "linear_attention_nnz": 610816, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 584960, "linear_attention_total": 2359296, "linear_attention_nnz": 405248, "linear_dense_total": 4718592, "linear_dense_nnz": 179712}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 540928, "linear_attention_total": 2359296, "linear_attention_nnz": 395008, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 658432, "linear_attention_total": 2359296, "linear_attention_nnz": 217600, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}}, "total_sparsity": 61.3681447432349, "linear_sparsity": 78.6789882330247}, "speed": {"eval_elapsed_time": 24.538471670006402}, "opt_eval_metrics": {"exact_match": 79.38505203405866, "f1": 87.07400986053686}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41735426, "linear_total": 84934656, "linear_nnz": 17776896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1405440, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 929280}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1732352, "linear_attention_total": 2359296, "linear_attention_nnz": 589568, "linear_dense_total": 4718592, "linear_dense_nnz": 1142784}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1979136, "linear_attention_total": 2359296, "linear_attention_nnz": 628992, "linear_dense_total": 4718592, "linear_dense_nnz": 1350144}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2218752, "linear_attention_total": 2359296, "linear_attention_nnz": 913152, "linear_dense_total": 4718592, "linear_dense_nnz": 1305600}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2257664, "linear_attention_total": 2359296, "linear_attention_nnz": 850688, "linear_dense_total": 4718592, "linear_dense_nnz": 1406976}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096384, "linear_attention_total": 2359296, "linear_attention_nnz": 764672, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1786112, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 1022976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1538816, "linear_attention_total": 2359296, "linear_attention_nnz": 781568, "linear_dense_total": 4718592, "linear_dense_nnz": 757248}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1027840, "linear_attention_total": 2359296, "linear_attention_nnz": 596224, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 176640}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523008, "linear_attention_total": 2359296, "linear_attention_nnz": 378624, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640000, "linear_attention_total": 2359296, "linear_attention_nnz": 208384, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.67306005721974, "linear_sparsity": 79.0699146412037}, "speed": {"eval_elapsed_time": 24.512695128913037}, "opt_eval_metrics": {"exact_match": 78.72280037842951, "f1": 86.62043892712619}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 43462146, "linear_total": 84934656, "linear_nnz": 19503616, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660672, "linear_attention_total": 2359296, "linear_attention_nnz": 579328, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 632576, "linear_dense_total": 4718592, "linear_dense_nnz": 1267200}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2031104, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1446912}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2544128, "linear_attention_total": 2359296, "linear_attention_nnz": 1049600, "linear_dense_total": 4718592, "linear_dense_nnz": 1494528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2395904, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 1479168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2184960, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 1394688}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1912320, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1113600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 969216, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 471808, "linear_dense_total": 4718592, "linear_dense_nnz": 497664}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 717312, "linear_attention_total": 2359296, "linear_attention_nnz": 505344, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631040, "linear_attention_total": 2359296, "linear_attention_nnz": 448256, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 288256, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}}, "total_sparsity": 60.08735936884057, "linear_sparsity": 77.03691647376543}, "speed": {"eval_elapsed_time": 23.01165157998912}, "opt_eval_metrics": {"exact_match": 78.93093661305582, "f1": 86.85787750084084}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42678018, "linear_total": 84934656, "linear_nnz": 18719488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1560320, "linear_attention_total": 2359296, "linear_attention_nnz": 543488, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1816320, "linear_attention_total": 2359296, "linear_attention_nnz": 593664, "linear_dense_total": 4718592, "linear_dense_nnz": 1222656}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2024704, "linear_attention_total": 2359296, "linear_attention_nnz": 603904, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2329856, "linear_attention_total": 2359296, "linear_attention_nnz": 870656, "linear_dense_total": 4718592, "linear_dense_nnz": 1459200}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2332928, "linear_attention_total": 2359296, "linear_attention_nnz": 887552, "linear_dense_total": 4718592, "linear_dense_nnz": 1445376}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 720640, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742080, "linear_attention_total": 2359296, "linear_attention_nnz": 926464, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 944384, "linear_attention_total": 2359296, "linear_attention_nnz": 455936, "linear_dense_total": 4718592, "linear_dense_nnz": 488448}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705280, "linear_attention_total": 2359296, "linear_attention_nnz": 505600, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 587264, "linear_attention_total": 2359296, "linear_attention_nnz": 409088, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 697856, "linear_attention_total": 2359296, "linear_attention_nnz": 250880, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}}, "total_sparsity": 60.80744850279245, "linear_sparsity": 77.96012972608024}, "speed": {"eval_elapsed_time": 22.739166334969923}, "opt_eval_metrics": {"exact_match": 78.78902554399244, "f1": 86.63899702391797}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 46192898, "linear_total": 84934656, "linear_nnz": 22234368, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 551680, "linear_dense_total": 4718592, "linear_dense_nnz": 1539072}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2278656, "linear_attention_total": 2359296, "linear_attention_nnz": 596736, "linear_dense_total": 4718592, "linear_dense_nnz": 1681920}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2418688, "linear_attention_total": 2359296, "linear_attention_nnz": 567808, "linear_dense_total": 4718592, "linear_dense_nnz": 1850880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2844416, "linear_attention_total": 2359296, "linear_attention_nnz": 1002752, "linear_dense_total": 4718592, "linear_dense_nnz": 1841664}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2691072, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 1812480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2475264, "linear_attention_total": 2359296, "linear_attention_nnz": 721152, "linear_dense_total": 4718592, "linear_dense_nnz": 1754112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2229248, "linear_attention_total": 2359296, "linear_attention_nnz": 805376, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1966336, "linear_attention_total": 2359296, "linear_attention_nnz": 892672, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701440, "linear_attention_total": 2359296, "linear_attention_nnz": 454144, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598272, "linear_attention_total": 2359296, "linear_attention_nnz": 361728, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858880, "linear_attention_total": 2359296, "linear_attention_nnz": 238336, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}}, "total_sparsity": 57.57962486284496, "linear_sparsity": 73.82179542824075}, "speed": {"eval_elapsed_time": 24.61073462094646}, "opt_eval_metrics": {"exact_match": 79.38505203405866, "f1": 86.84616693145111}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39741442, "linear_total": 84934656, "linear_nnz": 15782912, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1171968, "linear_attention_total": 2359296, "linear_attention_nnz": 511488, "linear_dense_total": 4718592, "linear_dense_nnz": 660480}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1531136, "linear_attention_total": 2359296, "linear_attention_nnz": 591104, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1722112, "linear_attention_total": 2359296, "linear_attention_nnz": 656128, "linear_dense_total": 4718592, "linear_dense_nnz": 1065984}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2136320, "linear_attention_total": 2359296, "linear_attention_nnz": 985856, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2026752, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1168896}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1822976, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 1138176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488384, "linear_attention_total": 2359296, "linear_attention_nnz": 668160, "linear_dense_total": 4718592, "linear_dense_nnz": 820224}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1423104, "linear_attention_total": 2359296, "linear_attention_nnz": 793344, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 861184, "linear_attention_total": 2359296, "linear_attention_nnz": 494080, "linear_dense_total": 4718592, "linear_dense_nnz": 367104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 570880, "linear_attention_total": 2359296, "linear_attention_nnz": 417280, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 499968, "linear_attention_total": 2359296, "linear_attention_nnz": 370944, "linear_dense_total": 4718592, "linear_dense_nnz": 129024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 528128, "linear_attention_total": 2359296, "linear_attention_nnz": 224000, "linear_dense_total": 4718592, "linear_dense_nnz": 304128}}, "total_sparsity": 63.504197590471826, "linear_sparsity": 81.41758294753086}, "speed": {"eval_elapsed_time": 22.93524650495965}, "opt_eval_metrics": {"exact_match": 78.93093661305582, "f1": 86.77654280449566}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-130000": {"stats": {"total": 108893186, "nnz": 38778370, "linear_total": 84934656, "linear_nnz": 14819840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1050624, "linear_attention_total": 2359296, "linear_attention_nnz": 488448, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 512512, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1628160, "linear_attention_total": 2359296, "linear_attention_nnz": 628224, "linear_dense_total": 4718592, "linear_dense_nnz": 999936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998592, "linear_attention_total": 2359296, "linear_attention_nnz": 937216, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1939968, "linear_attention_total": 2359296, "linear_attention_nnz": 821760, "linear_dense_total": 4718592, "linear_dense_nnz": 1118208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1709824, "linear_attention_total": 2359296, "linear_attention_nnz": 648448, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404928, "linear_attention_total": 2359296, "linear_attention_nnz": 641536, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817920, "linear_attention_total": 2359296, "linear_attention_nnz": 467712, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 544512, "linear_attention_total": 2359296, "linear_attention_nnz": 403200, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484096, "linear_attention_total": 2359296, "linear_attention_nnz": 367360, "linear_dense_total": 4718592, "linear_dense_nnz": 116736}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496896, "linear_attention_total": 2359296, "linear_attention_nnz": 225024, "linear_dense_total": 4718592, "linear_dense_nnz": 271872}}, "total_sparsity": 64.38861656596218, "linear_sparsity": 82.5514805169753}, "speed": {"eval_elapsed_time": 22.778588181943633}, "opt_eval_metrics": {"exact_match": 78.88363292336803, "f1": 86.63235572290178}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-165000": {"stats": {"total": 108893186, "nnz": 38293506, "linear_total": 84934656, "linear_nnz": 14334976, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1010688, "linear_attention_total": 2359296, "linear_attention_nnz": 468480, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1371392, "linear_attention_total": 2359296, "linear_attention_nnz": 518912, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1590272, "linear_attention_total": 2359296, "linear_attention_nnz": 608768, "linear_dense_total": 4718592, "linear_dense_nnz": 981504}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895936, "linear_attention_total": 2359296, "linear_attention_nnz": 869888, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1869568, "linear_attention_total": 2359296, "linear_attention_nnz": 775936, "linear_dense_total": 4718592, "linear_dense_nnz": 1093632}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663232, "linear_attention_total": 2359296, "linear_attention_nnz": 618752, "linear_dense_total": 4718592, "linear_dense_nnz": 1044480}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 629248, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1295872, "linear_attention_total": 2359296, "linear_attention_nnz": 707584, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 808704, "linear_attention_total": 2359296, "linear_attention_nnz": 463104, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 515840, "linear_attention_total": 2359296, "linear_attention_nnz": 376064, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455936, "linear_attention_total": 2359296, "linear_attention_nnz": 345344, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 212992, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 64.83388225963009, "linear_sparsity": 83.1223476080247}, "speed": {"eval_elapsed_time": 22.55623343400657}, "opt_eval_metrics": {"exact_match": 78.9593188268685, "f1": 86.71766917125102}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38916354, "linear_total": 84934656, "linear_nnz": 14957824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1209344, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 749568}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1494272, "linear_attention_total": 2359296, "linear_attention_nnz": 488192, "linear_dense_total": 4718592, "linear_dense_nnz": 1006080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1636096, "linear_attention_total": 2359296, "linear_attention_nnz": 550144, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969664, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1746944, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 1198080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1782272, "linear_attention_total": 2359296, "linear_attention_nnz": 653312, "linear_dense_total": 4718592, "linear_dense_nnz": 1128960}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1461760, "linear_attention_total": 2359296, "linear_attention_nnz": 593920, "linear_dense_total": 4718592, "linear_dense_nnz": 867840}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 754688, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531968, "linear_attention_total": 2359296, "linear_attention_nnz": 373760, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460032, "linear_attention_total": 2359296, "linear_attention_nnz": 311040, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}}, "total_sparsity": 64.26190156654981, "linear_sparsity": 82.38902150848766}, "speed": {"eval_elapsed_time": 22.34537693602033}, "opt_eval_metrics": {"exact_match": 78.69441816461683, "f1": 86.58409293332078}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 50872322, "linear_total": 84934656, "linear_nnz": 26913792, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692352, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 2007552}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666496, "linear_attention_total": 2359296, "linear_attention_nnz": 646656, "linear_dense_total": 4718592, "linear_dense_nnz": 2019840}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2931200, "linear_attention_total": 2359296, "linear_attention_nnz": 691712, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3361024, "linear_attention_total": 2359296, "linear_attention_nnz": 1149184, "linear_dense_total": 4718592, "linear_dense_nnz": 2211840}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3165952, "linear_attention_total": 2359296, "linear_attention_nnz": 1007872, "linear_dense_total": 4718592, "linear_dense_nnz": 2158080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070976, "linear_attention_total": 2359296, "linear_attention_nnz": 997376, "linear_dense_total": 4718592, "linear_dense_nnz": 2073600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2644480, "linear_attention_total": 2359296, "linear_attention_nnz": 911872, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2248704, "linear_attention_total": 2359296, "linear_attention_nnz": 944640, "linear_dense_total": 4718592, "linear_dense_nnz": 1304064}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1514240, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 751104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 839424, "linear_attention_total": 2359296, "linear_attention_nnz": 526080, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 707072, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 274944}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1071872, "linear_attention_total": 2359296, "linear_attention_nnz": 277760, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}}, "total_sparsity": 53.282364242699266, "linear_sparsity": 68.31235532407408}, "speed": {"eval_elapsed_time": 26.683821239043027}, "opt_eval_metrics": {"exact_match": 79.94323557237465, "f1": 87.52956877579788}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl225_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 27752545, "linear_total": 84934656, "linear_nnz": 3794015, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 465383, "linear_attention_total": 2359296, "linear_attention_nnz": 18728, "linear_dense_total": 4718592, "linear_dense_nnz": 446655}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527397, "linear_attention_total": 2359296, "linear_attention_nnz": 63059, "linear_dense_total": 4718592, "linear_dense_nnz": 464338}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516492, "linear_attention_total": 2359296, "linear_attention_nnz": 53761, "linear_dense_total": 4718592, "linear_dense_nnz": 462731}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 514972, "linear_attention_total": 2359296, "linear_attention_nnz": 84624, "linear_dense_total": 4718592, "linear_dense_nnz": 430348}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 443214, "linear_attention_total": 2359296, "linear_attention_nnz": 58345, "linear_dense_total": 4718592, "linear_dense_nnz": 384869}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 396921, "linear_attention_total": 2359296, "linear_attention_nnz": 50615, "linear_dense_total": 4718592, "linear_dense_nnz": 346306}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 319004, "linear_attention_total": 2359296, "linear_attention_nnz": 41344, "linear_dense_total": 4718592, "linear_dense_nnz": 277660}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 249183, "linear_attention_total": 2359296, "linear_attention_nnz": 47420, "linear_dense_total": 4718592, "linear_dense_nnz": 201763}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 161062, "linear_attention_total": 2359296, "linear_attention_nnz": 27562, "linear_dense_total": 4718592, "linear_dense_nnz": 133500}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 81705, "linear_attention_total": 2359296, "linear_attention_nnz": 34151, "linear_dense_total": 4718592, "linear_dense_nnz": 47554}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64643, "linear_attention_total": 2359296, "linear_attention_nnz": 21311, "linear_dense_total": 4718592, "linear_dense_nnz": 43332}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 54039, "linear_attention_total": 2359296, "linear_attention_nnz": 17233, "linear_dense_total": 4718592, "linear_dense_nnz": 36806}}, "total_sparsity": 74.51397463933142, "linear_sparsity": 95.5330189363456}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 33917936, "linear_total": 84934656, "linear_nnz": 9959406, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1111233, "linear_attention_total": 2359296, "linear_attention_nnz": 56754, "linear_dense_total": 4718592, "linear_dense_nnz": 1054479}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222867, "linear_attention_total": 2359296, "linear_attention_nnz": 116764, "linear_dense_total": 4718592, "linear_dense_nnz": 1106103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264439, "linear_attention_total": 2359296, "linear_attention_nnz": 127558, "linear_dense_total": 4718592, "linear_dense_nnz": 1136881}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1270104, "linear_attention_total": 2359296, "linear_attention_nnz": 163709, "linear_dense_total": 4718592, "linear_dense_nnz": 1106395}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1202300, "linear_attention_total": 2359296, "linear_attention_nnz": 158018, "linear_dense_total": 4718592, "linear_dense_nnz": 1044282}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1136195, "linear_attention_total": 2359296, "linear_attention_nnz": 125746, "linear_dense_total": 4718592, "linear_dense_nnz": 1010449}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 971117, "linear_attention_total": 2359296, "linear_attention_nnz": 110023, "linear_dense_total": 4718592, "linear_dense_nnz": 861094}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746075, "linear_attention_total": 2359296, "linear_attention_nnz": 113086, "linear_dense_total": 4718592, "linear_dense_nnz": 632989}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488971, "linear_attention_total": 2359296, "linear_attention_nnz": 81879, "linear_dense_total": 4718592, "linear_dense_nnz": 407092}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 250695, "linear_attention_total": 2359296, "linear_attention_nnz": 77365, "linear_dense_total": 4718592, "linear_dense_nnz": 173330}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 172793, "linear_attention_total": 2359296, "linear_attention_nnz": 50915, "linear_dense_total": 4718592, "linear_dense_nnz": 121878}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 122617, "linear_attention_total": 2359296, "linear_attention_nnz": 28303, "linear_dense_total": 4718592, "linear_dense_nnz": 94314}}, "total_sparsity": 68.85210429971255, "linear_sparsity": 88.27403739646628}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33825359, "linear_total": 84934656, "linear_nnz": 9866829, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1100628, "linear_attention_total": 2359296, "linear_attention_nnz": 56086, "linear_dense_total": 4718592, "linear_dense_nnz": 1044542}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211778, "linear_attention_total": 2359296, "linear_attention_nnz": 115328, "linear_dense_total": 4718592, "linear_dense_nnz": 1096450}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253069, "linear_attention_total": 2359296, "linear_attention_nnz": 125881, "linear_dense_total": 4718592, "linear_dense_nnz": 1127188}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258511, "linear_attention_total": 2359296, "linear_attention_nnz": 161525, "linear_dense_total": 4718592, "linear_dense_nnz": 1096986}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1191705, "linear_attention_total": 2359296, "linear_attention_nnz": 155911, "linear_dense_total": 4718592, "linear_dense_nnz": 1035794}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125428, "linear_attention_total": 2359296, "linear_attention_nnz": 123921, "linear_dense_total": 4718592, "linear_dense_nnz": 1001507}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 961919, "linear_attention_total": 2359296, "linear_attention_nnz": 108430, "linear_dense_total": 4718592, "linear_dense_nnz": 853489}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738628, "linear_attention_total": 2359296, "linear_attention_nnz": 111505, "linear_dense_total": 4718592, "linear_dense_nnz": 627123}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484188, "linear_attention_total": 2359296, "linear_attention_nnz": 80805, "linear_dense_total": 4718592, "linear_dense_nnz": 403383}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 247948, "linear_attention_total": 2359296, "linear_attention_nnz": 76456, "linear_dense_total": 4718592, "linear_dense_nnz": 171492}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 171235, "linear_attention_total": 2359296, "linear_attention_nnz": 50374, "linear_dense_total": 4718592, "linear_dense_nnz": 120861}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 121792, "linear_attention_total": 2359296, "linear_attention_nnz": 28038, "linear_dense_total": 4718592, "linear_dense_nnz": 93754}}, "total_sparsity": 68.93712063856779, "linear_sparsity": 88.38303530657733}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl150_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 29470276, "linear_total": 84934656, "linear_nnz": 5511746, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655184, "linear_attention_total": 2359296, "linear_attention_nnz": 30729, "linear_dense_total": 4718592, "linear_dense_nnz": 624455}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 733131, "linear_attention_total": 2359296, "linear_attention_nnz": 77742, "linear_dense_total": 4718592, "linear_dense_nnz": 655389}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730379, "linear_attention_total": 2359296, "linear_attention_nnz": 70206, "linear_dense_total": 4718592, "linear_dense_nnz": 660173}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 734451, "linear_attention_total": 2359296, "linear_attention_nnz": 106339, "linear_dense_total": 4718592, "linear_dense_nnz": 628112}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655863, "linear_attention_total": 2359296, "linear_attention_nnz": 81845, "linear_dense_total": 4718592, "linear_dense_nnz": 574018}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 606306, "linear_attention_total": 2359296, "linear_attention_nnz": 68554, "linear_dense_total": 4718592, "linear_dense_nnz": 537752}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 492846, "linear_attention_total": 2359296, "linear_attention_nnz": 58217, "linear_dense_total": 4718592, "linear_dense_nnz": 434629}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379389, "linear_attention_total": 2359296, "linear_attention_nnz": 65705, "linear_dense_total": 4718592, "linear_dense_nnz": 313684}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 243207, "linear_attention_total": 2359296, "linear_attention_nnz": 39483, "linear_dense_total": 4718592, "linear_dense_nnz": 203724}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 119606, "linear_attention_total": 2359296, "linear_attention_nnz": 46007, "linear_dense_total": 4718592, "linear_dense_nnz": 73599}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 89281, "linear_attention_total": 2359296, "linear_attention_nnz": 27892, "linear_dense_total": 4718592, "linear_dense_nnz": 61389}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 72103, "linear_attention_total": 2359296, "linear_attention_nnz": 20781, "linear_dense_total": 4718592, "linear_dense_nnz": 51322}}, "total_sparsity": 72.93652882926945, "linear_sparsity": 93.51060419906804}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36773378, "linear_total": 84934656, "linear_nnz": 12814848, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1044480, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1177088, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1450496, "linear_attention_total": 2359296, "linear_attention_nnz": 492032, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652224, "linear_attention_total": 2359296, "linear_attention_nnz": 733696, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1511680, "linear_attention_total": 2359296, "linear_attention_nnz": 461056, "linear_dense_total": 4718592, "linear_dense_nnz": 1050624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1533952, "linear_attention_total": 2359296, "linear_attention_nnz": 580096, "linear_dense_total": 4718592, "linear_dense_nnz": 953856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1227520, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 764928}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 624384, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 700416, "linear_attention_total": 2359296, "linear_attention_nnz": 351744, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 479744, "linear_attention_total": 2359296, "linear_attention_nnz": 339968, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411392, "linear_attention_total": 2359296, "linear_attention_nnz": 276224, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 430080, "linear_attention_total": 2359296, "linear_attention_nnz": 178176, "linear_dense_total": 4718592, "linear_dense_nnz": 251904}}, "total_sparsity": 66.22986308803564, "linear_sparsity": 84.912109375}, "speed": {"eval_elapsed_time": 21.138172222999856}, "opt_eval_metrics": {"exact_match": 77.93755912961211, "f1": 86.0611894864831}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 46385410, "linear_total": 84934656, "linear_nnz": 22426880, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2004992, "linear_attention_total": 2359296, "linear_attention_nnz": 594944, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2177280, "linear_attention_total": 2359296, "linear_attention_nnz": 672000, "linear_dense_total": 4718592, "linear_dense_nnz": 1505280}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2592256, "linear_attention_total": 2359296, "linear_attention_nnz": 859648, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2838016, "linear_attention_total": 2359296, "linear_attention_nnz": 1172992, "linear_dense_total": 4718592, "linear_dense_nnz": 1665024}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2712832, "linear_attention_total": 2359296, "linear_attention_nnz": 1026304, "linear_dense_total": 4718592, "linear_dense_nnz": 1686528}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2600448, "linear_attention_total": 2359296, "linear_attention_nnz": 976896, "linear_dense_total": 4718592, "linear_dense_nnz": 1623552}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2246144, "linear_attention_total": 2359296, "linear_attention_nnz": 955904, "linear_dense_total": 4718592, "linear_dense_nnz": 1290240}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842688, "linear_attention_total": 2359296, "linear_attention_nnz": 901120, "linear_dense_total": 4718592, "linear_dense_nnz": 941568}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1241856, "linear_attention_total": 2359296, "linear_attention_nnz": 718080, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 693760, "linear_attention_total": 2359296, "linear_attention_nnz": 475648, "linear_dense_total": 4718592, "linear_dense_nnz": 218112}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640768, "linear_attention_total": 2359296, "linear_attention_nnz": 441088, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835840, "linear_attention_total": 2359296, "linear_attention_nnz": 246016, "linear_dense_total": 4718592, "linear_dense_nnz": 589824}}, "total_sparsity": 57.4028351048522, "linear_sparsity": 73.59513647762346}, "speed": {"eval_elapsed_time": 26.352183306007646}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 87.07646648866317}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 43189250, "linear_total": 84934656, "linear_nnz": 19230720, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1584896, "linear_attention_total": 2359296, "linear_attention_nnz": 494336, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1917184, "linear_attention_total": 2359296, "linear_attention_nnz": 631552, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2092032, "linear_attention_total": 2359296, "linear_attention_nnz": 648192, "linear_dense_total": 4718592, "linear_dense_nnz": 1443840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2466816, "linear_attention_total": 2359296, "linear_attention_nnz": 1047552, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2403328, "linear_attention_total": 2359296, "linear_attention_nnz": 942592, "linear_dense_total": 4718592, "linear_dense_nnz": 1460736}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2211072, "linear_attention_total": 2359296, "linear_attention_nnz": 837888, "linear_dense_total": 4718592, "linear_dense_nnz": 1373184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1936640, "linear_attention_total": 2359296, "linear_attention_nnz": 841472, "linear_dense_total": 4718592, "linear_dense_nnz": 1095168}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661440, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 827904}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1084160, "linear_attention_total": 2359296, "linear_attention_nnz": 621824, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621056, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 188928}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 568064, "linear_attention_total": 2359296, "linear_attention_nnz": 411392, "linear_dense_total": 4718592, "linear_dense_nnz": 156672}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 684032, "linear_attention_total": 2359296, "linear_attention_nnz": 223232, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 60.33796825450584, "linear_sparsity": 77.3582175925926}, "speed": {"eval_elapsed_time": 25.076852509053424}, "opt_eval_metrics": {"exact_match": 78.74172185430463, "f1": 86.7418554019491}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42070530, "linear_total": 84934656, "linear_nnz": 18112000, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451008, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 970752}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835264, "linear_attention_total": 2359296, "linear_attention_nnz": 620288, "linear_dense_total": 4718592, "linear_dense_nnz": 1214976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2000384, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1374720}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2271232, "linear_attention_total": 2359296, "linear_attention_nnz": 933376, "linear_dense_total": 4718592, "linear_dense_nnz": 1337856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2267904, "linear_attention_total": 2359296, "linear_attention_nnz": 862464, "linear_dense_total": 4718592, "linear_dense_nnz": 1405440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081536, "linear_attention_total": 2359296, "linear_attention_nnz": 783616, "linear_dense_total": 4718592, "linear_dense_nnz": 1297920}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1807104, "linear_attention_total": 2359296, "linear_attention_nnz": 773376, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602048, "linear_attention_total": 2359296, "linear_attention_nnz": 811008, "linear_dense_total": 4718592, "linear_dense_nnz": 791040}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1009920, "linear_attention_total": 2359296, "linear_attention_nnz": 572160, "linear_dense_total": 4718592, "linear_dense_nnz": 437760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 543232, "linear_attention_total": 2359296, "linear_attention_nnz": 392704, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649472, "linear_attention_total": 2359296, "linear_attention_nnz": 214784, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}}, "total_sparsity": 61.365323630075444, "linear_sparsity": 78.67537133487654}, "speed": {"eval_elapsed_time": 24.88070543995127}, "opt_eval_metrics": {"exact_match": 78.92147587511826, "f1": 86.74888507219117}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41670402, "linear_total": 84934656, "linear_nnz": 17711872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1409024, "linear_attention_total": 2359296, "linear_attention_nnz": 468992, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1792000, "linear_attention_total": 2359296, "linear_attention_nnz": 606208, "linear_dense_total": 4718592, "linear_dense_nnz": 1185792}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1974272, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1348608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2231552, "linear_attention_total": 2359296, "linear_attention_nnz": 910592, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2209536, "linear_attention_total": 2359296, "linear_attention_nnz": 828672, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2046464, "linear_attention_total": 2359296, "linear_attention_nnz": 765440, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1764096, "linear_attention_total": 2359296, "linear_attention_nnz": 761088, "linear_dense_total": 4718592, "linear_dense_nnz": 1003008}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1573120, "linear_attention_total": 2359296, "linear_attention_nnz": 792832, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 986880, "linear_attention_total": 2359296, "linear_attention_nnz": 553728, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 572672, "linear_attention_total": 2359296, "linear_attention_nnz": 389888, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 525568, "linear_attention_total": 2359296, "linear_attention_nnz": 378112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}}, "total_sparsity": 61.73277361909495, "linear_sparsity": 79.14647231867285}, "speed": {"eval_elapsed_time": 24.467614763067104}, "opt_eval_metrics": {"exact_match": 78.76064333017976, "f1": 86.70283536757672}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41478658, "linear_total": 84934656, "linear_nnz": 17520128, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404160, "linear_attention_total": 2359296, "linear_attention_nnz": 465664, "linear_dense_total": 4718592, "linear_dense_nnz": 938496}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1766912, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1182720}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1961216, "linear_attention_total": 2359296, "linear_attention_nnz": 615680, "linear_dense_total": 4718592, "linear_dense_nnz": 1345536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 895488, "linear_dense_total": 4718592, "linear_dense_nnz": 1314816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2189824, "linear_attention_total": 2359296, "linear_attention_nnz": 812032, "linear_dense_total": 4718592, "linear_dense_nnz": 1377792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2038016, "linear_attention_total": 2359296, "linear_attention_nnz": 755456, "linear_dense_total": 4718592, "linear_dense_nnz": 1282560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1738240, "linear_attention_total": 2359296, "linear_attention_nnz": 739840, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571584, "linear_attention_total": 2359296, "linear_attention_nnz": 797440, "linear_dense_total": 4718592, "linear_dense_nnz": 774144}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 943872, "linear_attention_total": 2359296, "linear_attention_nnz": 513792, "linear_dense_total": 4718592, "linear_dense_nnz": 430080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563968, "linear_attention_total": 2359296, "linear_attention_nnz": 381184, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516352, "linear_attention_total": 2359296, "linear_attention_nnz": 370432, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 615680, "linear_attention_total": 2359296, "linear_attention_nnz": 200960, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}}, "total_sparsity": 61.90885809879785, "linear_sparsity": 79.37222704475309}, "speed": {"eval_elapsed_time": 24.449092374998145}, "opt_eval_metrics": {"exact_match": 78.85525070955535, "f1": 86.78368120366805}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40218943, "linear_total": 84934656, "linear_nnz": 16260413, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725694, "linear_attention_total": 2359296, "linear_attention_nnz": 36794, "linear_dense_total": 4718592, "linear_dense_nnz": 1688900}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959620, "linear_attention_total": 2359296, "linear_attention_nnz": 233028, "linear_dense_total": 4718592, "linear_dense_nnz": 1726592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969125, "linear_attention_total": 2359296, "linear_attention_nnz": 194318, "linear_dense_total": 4718592, "linear_dense_nnz": 1774807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2012358, "linear_attention_total": 2359296, "linear_attention_nnz": 270153, "linear_dense_total": 4718592, "linear_dense_nnz": 1742205}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860862, "linear_attention_total": 2359296, "linear_attention_nnz": 207935, "linear_dense_total": 4718592, "linear_dense_nnz": 1652927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815188, "linear_attention_total": 2359296, "linear_attention_nnz": 215427, "linear_dense_total": 4718592, "linear_dense_nnz": 1599761}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518978, "linear_attention_total": 2359296, "linear_attention_nnz": 114563, "linear_dense_total": 4718592, "linear_dense_nnz": 1404415}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307646, "linear_attention_total": 2359296, "linear_attention_nnz": 165011, "linear_dense_total": 4718592, "linear_dense_nnz": 1142635}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 946142, "linear_attention_total": 2359296, "linear_attention_nnz": 86589, "linear_dense_total": 4718592, "linear_dense_nnz": 859553}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531809, "linear_attention_total": 2359296, "linear_attention_nnz": 110020, "linear_dense_total": 4718592, "linear_dense_nnz": 421789}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 419075, "linear_attention_total": 2359296, "linear_attention_nnz": 89475, "linear_dense_total": 4718592, "linear_dense_nnz": 329600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 193916, "linear_attention_total": 2359296, "linear_attention_nnz": 45791, "linear_dense_total": 4718592, "linear_dense_nnz": 148125}}, "total_sparsity": 63.065693568741764, "linear_sparsity": 80.85538487375518}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 30451970, "linear_total": 84934656, "linear_nnz": 6493440, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 490240, "linear_attention_total": 2359296, "linear_attention_nnz": 259840, "linear_dense_total": 4718592, "linear_dense_nnz": 230400}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591104, "linear_attention_total": 2359296, "linear_attention_nnz": 225536, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 770048, "linear_attention_total": 2359296, "linear_attention_nnz": 286208, "linear_dense_total": 4718592, "linear_dense_nnz": 483840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 863488, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 747008, "linear_attention_total": 2359296, "linear_attention_nnz": 214016, "linear_dense_total": 4718592, "linear_dense_nnz": 532992}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 763392, "linear_attention_total": 2359296, "linear_attention_nnz": 285696, "linear_dense_total": 4718592, "linear_dense_nnz": 477696}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 499456, "linear_attention_total": 2359296, "linear_attention_nnz": 113920, "linear_dense_total": 4718592, "linear_dense_nnz": 385536}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 650752, "linear_attention_total": 2359296, "linear_attention_nnz": 303616, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 407808, "linear_attention_total": 2359296, "linear_attention_nnz": 162048, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 293888, "linear_attention_total": 2359296, "linear_attention_nnz": 206336, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 206336, "linear_attention_total": 2359296, "linear_attention_nnz": 117248, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 209920, "linear_attention_total": 2359296, "linear_attention_nnz": 103936, "linear_dense_total": 4718592, "linear_dense_nnz": 105984}}, "total_sparsity": 72.03500869191208, "linear_sparsity": 92.35478153935185}, "speed": {"eval_elapsed_time": 18.056047238991596}, "opt_eval_metrics": {"exact_match": 73.92620624408704, "f1": 83.01994135540168}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 30144002, "linear_total": 84934656, "linear_nnz": 6185472, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 462848, "linear_attention_total": 2359296, "linear_attention_nnz": 237056, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566784, "linear_attention_total": 2359296, "linear_attention_nnz": 219648, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 744960, "linear_attention_total": 2359296, "linear_attention_nnz": 278016, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 823552, "linear_attention_total": 2359296, "linear_attention_nnz": 379648, "linear_dense_total": 4718592, "linear_dense_nnz": 443904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708352, "linear_attention_total": 2359296, "linear_attention_nnz": 193792, "linear_dense_total": 4718592, "linear_dense_nnz": 514560}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 703488, "linear_attention_total": 2359296, "linear_attention_nnz": 247296, "linear_dense_total": 4718592, "linear_dense_nnz": 456192}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488448, "linear_attention_total": 2359296, "linear_attention_nnz": 118272, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 636160, "linear_attention_total": 2359296, "linear_attention_nnz": 296704, "linear_dense_total": 4718592, "linear_dense_nnz": 339456}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 393728, "linear_attention_total": 2359296, "linear_attention_nnz": 152576, "linear_dense_total": 4718592, "linear_dense_nnz": 241152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 278528, "linear_attention_total": 2359296, "linear_attention_nnz": 190976, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 190976, "linear_attention_total": 2359296, "linear_attention_nnz": 104960, "linear_dense_total": 4718592, "linear_dense_nnz": 86016}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 187648, "linear_attention_total": 2359296, "linear_attention_nnz": 90880, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 72.31782528614784, "linear_sparsity": 92.7173755787037}, "speed": {"eval_elapsed_time": 17.975527490023524}, "opt_eval_metrics": {"exact_match": 73.48155156102176, "f1": 82.77426887329388}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 30105858, "linear_total": 84934656, "linear_nnz": 6147328, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460800, "linear_attention_total": 2359296, "linear_attention_nnz": 235008, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566272, "linear_attention_total": 2359296, "linear_attention_nnz": 220672, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727040, "linear_attention_total": 2359296, "linear_attention_nnz": 260096, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 821760, "linear_attention_total": 2359296, "linear_attention_nnz": 380928, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 718080, "linear_attention_total": 2359296, "linear_attention_nnz": 215808, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 706048, "linear_attention_total": 2359296, "linear_attention_nnz": 251392, "linear_dense_total": 4718592, "linear_dense_nnz": 454656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 475648, "linear_attention_total": 2359296, "linear_attention_nnz": 103936, "linear_dense_total": 4718592, "linear_dense_nnz": 371712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 624384, "linear_attention_total": 2359296, "linear_attention_nnz": 284928, "linear_dense_total": 4718592, "linear_dense_nnz": 339456}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 388608, "linear_attention_total": 2359296, "linear_attention_nnz": 147456, "linear_dense_total": 4718592, "linear_dense_nnz": 241152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 268800, "linear_attention_total": 2359296, "linear_attention_nnz": 181248, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 199168, "linear_attention_total": 2359296, "linear_attention_nnz": 113152, "linear_dense_total": 4718592, "linear_dense_nnz": 86016}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 190720, "linear_attention_total": 2359296, "linear_attention_nnz": 93952, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 72.35285410787779, "linear_sparsity": 92.76228539737654}, "speed": {"eval_elapsed_time": 17.995222621015273}, "opt_eval_metrics": {"exact_match": 73.04635761589404, "f1": 82.29210924509454}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 36176130, "linear_total": 84934656, "linear_nnz": 12217600, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 911360, "linear_attention_total": 2359296, "linear_attention_nnz": 352256, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1284864, "linear_attention_total": 2359296, "linear_attention_nnz": 478464, "linear_dense_total": 4718592, "linear_dense_nnz": 806400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1386240, "linear_attention_total": 2359296, "linear_attention_nnz": 461568, "linear_dense_total": 4718592, "linear_dense_nnz": 924672}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1616128, "linear_attention_total": 2359296, "linear_attention_nnz": 763648, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1410048, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1415424, "linear_attention_total": 2359296, "linear_attention_nnz": 509184, "linear_dense_total": 4718592, "linear_dense_nnz": 906240}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1157632, "linear_attention_total": 2359296, "linear_attention_nnz": 458752, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1138688, "linear_attention_total": 2359296, "linear_attention_nnz": 550400, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 661504, "linear_attention_total": 2359296, "linear_attention_nnz": 311296, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 448512, "linear_attention_total": 2359296, "linear_attention_nnz": 319488, "linear_dense_total": 4718592, "linear_dense_nnz": 129024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379904, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 407296, "linear_attention_total": 2359296, "linear_attention_nnz": 160000, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}}, "total_sparsity": 66.7783345047871, "linear_sparsity": 85.6152946566358}, "speed": {"eval_elapsed_time": 21.68255266698543}, "opt_eval_metrics": {"exact_match": 76.43330179754021, "f1": 84.92125512821515}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33844482, "linear_total": 84934656, "linear_nnz": 9885952, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701184, "linear_attention_total": 2359296, "linear_attention_nnz": 295680, "linear_dense_total": 4718592, "linear_dense_nnz": 405504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042688, "linear_attention_total": 2359296, "linear_attention_nnz": 380672, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1087744, "linear_attention_total": 2359296, "linear_attention_nnz": 328960, "linear_dense_total": 4718592, "linear_dense_nnz": 758784}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1340672, "linear_attention_total": 2359296, "linear_attention_nnz": 612608, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1142784, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 811008}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1165312, "linear_attention_total": 2359296, "linear_attention_nnz": 411136, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908032, "linear_attention_total": 2359296, "linear_attention_nnz": 319744, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 956672, "linear_attention_total": 2359296, "linear_attention_nnz": 457472, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 557056, "linear_attention_total": 2359296, "linear_attention_nnz": 246784, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360192, "linear_attention_total": 2359296, "linear_attention_nnz": 252672, "linear_dense_total": 4718592, "linear_dense_nnz": 107520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315904, "linear_attention_total": 2359296, "linear_attention_nnz": 202240, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 307712, "linear_attention_total": 2359296, "linear_attention_nnz": 129536, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}}, "total_sparsity": 68.91955939281638, "linear_sparsity": 88.36052035108025}, "speed": {"eval_elapsed_time": 20.976891906931996}, "opt_eval_metrics": {"exact_match": 76.3670766319773, "f1": 84.90500621616839}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl300_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 26853628, "linear_total": 84934656, "linear_nnz": 2895098, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 357857, "linear_attention_total": 2359296, "linear_attention_nnz": 13195, "linear_dense_total": 4718592, "linear_dense_nnz": 344662}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 405482, "linear_attention_total": 2359296, "linear_attention_nnz": 53357, "linear_dense_total": 4718592, "linear_dense_nnz": 352125}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 395119, "linear_attention_total": 2359296, "linear_attention_nnz": 43981, "linear_dense_total": 4718592, "linear_dense_nnz": 351138}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 394117, "linear_attention_total": 2359296, "linear_attention_nnz": 71058, "linear_dense_total": 4718592, "linear_dense_nnz": 323059}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 335373, "linear_attention_total": 2359296, "linear_attention_nnz": 47705, "linear_dense_total": 4718592, "linear_dense_nnz": 287668}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 292526, "linear_attention_total": 2359296, "linear_attention_nnz": 40348, "linear_dense_total": 4718592, "linear_dense_nnz": 252178}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 238114, "linear_attention_total": 2359296, "linear_attention_nnz": 33002, "linear_dense_total": 4718592, "linear_dense_nnz": 205112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 188891, "linear_attention_total": 2359296, "linear_attention_nnz": 38753, "linear_dense_total": 4718592, "linear_dense_nnz": 150138}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 123365, "linear_attention_total": 2359296, "linear_attention_nnz": 22052, "linear_dense_total": 4718592, "linear_dense_nnz": 101313}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64415, "linear_attention_total": 2359296, "linear_attention_nnz": 28498, "linear_dense_total": 4718592, "linear_dense_nnz": 35917}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 53470, "linear_attention_total": 2359296, "linear_attention_nnz": 18747, "linear_dense_total": 4718592, "linear_dense_nnz": 34723}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 46369, "linear_attention_total": 2359296, "linear_attention_nnz": 15957, "linear_dense_total": 4718592, "linear_dense_nnz": 30412}}, "total_sparsity": 75.33947808267818, "linear_sparsity": 96.59138196780358}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 38110440, "linear_total": 84934656, "linear_nnz": 14151910, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1521793, "linear_attention_total": 2359296, "linear_attention_nnz": 87221, "linear_dense_total": 4718592, "linear_dense_nnz": 1434572}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637844, "linear_attention_total": 2359296, "linear_attention_nnz": 157517, "linear_dense_total": 4718592, "linear_dense_nnz": 1480327}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1723746, "linear_attention_total": 2359296, "linear_attention_nnz": 188172, "linear_dense_total": 4718592, "linear_dense_nnz": 1535574}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742961, "linear_attention_total": 2359296, "linear_attention_nnz": 230341, "linear_dense_total": 4718592, "linear_dense_nnz": 1512620}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1687428, "linear_attention_total": 2359296, "linear_attention_nnz": 240387, "linear_dense_total": 4718592, "linear_dense_nnz": 1447041}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1623377, "linear_attention_total": 2359296, "linear_attention_nnz": 195780, "linear_dense_total": 4718592, "linear_dense_nnz": 1427597}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1429982, "linear_attention_total": 2359296, "linear_attention_nnz": 184963, "linear_dense_total": 4718592, "linear_dense_nnz": 1245019}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130199, "linear_attention_total": 2359296, "linear_attention_nnz": 172954, "linear_dense_total": 4718592, "linear_dense_nnz": 957245}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 773896, "linear_attention_total": 2359296, "linear_attention_nnz": 138133, "linear_dense_total": 4718592, "linear_dense_nnz": 635763}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417863, "linear_attention_total": 2359296, "linear_attention_nnz": 112972, "linear_dense_total": 4718592, "linear_dense_nnz": 304891}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279992, "linear_attention_total": 2359296, "linear_attention_nnz": 75446, "linear_dense_total": 4718592, "linear_dense_nnz": 204546}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 182829, "linear_attention_total": 2359296, "linear_attention_nnz": 38439, "linear_dense_total": 4718592, "linear_dense_nnz": 144390}}, "total_sparsity": 65.00199746198996, "linear_sparsity": 83.3378850677867}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37366845, "linear_total": 84934656, "linear_nnz": 13408315, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1442154, "linear_attention_total": 2359296, "linear_attention_nnz": 79341, "linear_dense_total": 4718592, "linear_dense_nnz": 1362813}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1557975, "linear_attention_total": 2359296, "linear_attention_nnz": 146964, "linear_dense_total": 4718592, "linear_dense_nnz": 1411011}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637409, "linear_attention_total": 2359296, "linear_attention_nnz": 173655, "linear_dense_total": 4718592, "linear_dense_nnz": 1463754}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1655712, "linear_attention_total": 2359296, "linear_attention_nnz": 213353, "linear_dense_total": 4718592, "linear_dense_nnz": 1442359}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601748, "linear_attention_total": 2359296, "linear_attention_nnz": 221518, "linear_dense_total": 4718592, "linear_dense_nnz": 1380230}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1539647, "linear_attention_total": 2359296, "linear_attention_nnz": 179373, "linear_dense_total": 4718592, "linear_dense_nnz": 1360274}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1352289, "linear_attention_total": 2359296, "linear_attention_nnz": 168393, "linear_dense_total": 4718592, "linear_dense_nnz": 1183896}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1066215, "linear_attention_total": 2359296, "linear_attention_nnz": 159612, "linear_dense_total": 4718592, "linear_dense_nnz": 906603}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727923, "linear_attention_total": 2359296, "linear_attention_nnz": 127230, "linear_dense_total": 4718592, "linear_dense_nnz": 600693}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 390947, "linear_attention_total": 2359296, "linear_attention_nnz": 105257, "linear_dense_total": 4718592, "linear_dense_nnz": 285690}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 262617, "linear_attention_total": 2359296, "linear_attention_nnz": 70746, "linear_dense_total": 4718592, "linear_dense_nnz": 191871}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 173679, "linear_attention_total": 2359296, "linear_attention_nnz": 36271, "linear_dense_total": 4718592, "linear_dense_nnz": 137408}}, "total_sparsity": 65.68486388119823, "linear_sparsity": 84.21337575088313}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 45252556, "linear_total": 84934656, "linear_nnz": 21294026, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2152743, "linear_attention_total": 2359296, "linear_attention_nnz": 158912, "linear_dense_total": 4718592, "linear_dense_nnz": 1993831}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2265132, "linear_attention_total": 2359296, "linear_attention_nnz": 234395, "linear_dense_total": 4718592, "linear_dense_nnz": 2030737}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415512, "linear_attention_total": 2359296, "linear_attention_nnz": 301048, "linear_dense_total": 4718592, "linear_dense_nnz": 2114464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465567, "linear_attention_total": 2359296, "linear_attention_nnz": 358791, "linear_dense_total": 4718592, "linear_dense_nnz": 2106776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2457267, "linear_attention_total": 2359296, "linear_attention_nnz": 398673, "linear_dense_total": 4718592, "linear_dense_nnz": 2058594}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2410577, "linear_attention_total": 2359296, "linear_attention_nnz": 367333, "linear_dense_total": 4718592, "linear_dense_nnz": 2043244}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2206780, "linear_attention_total": 2359296, "linear_attention_nnz": 344288, "linear_dense_total": 4718592, "linear_dense_nnz": 1862492}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1819031, "linear_attention_total": 2359296, "linear_attention_nnz": 304514, "linear_dense_total": 4718592, "linear_dense_nnz": 1514517}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1364821, "linear_attention_total": 2359296, "linear_attention_nnz": 265513, "linear_dense_total": 4718592, "linear_dense_nnz": 1099308}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 828990, "linear_attention_total": 2359296, "linear_attention_nnz": 201714, "linear_dense_total": 4718592, "linear_dense_nnz": 627276}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574541, "linear_attention_total": 2359296, "linear_attention_nnz": 134277, "linear_dense_total": 4718592, "linear_dense_nnz": 440264}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 333065, "linear_attention_total": 2359296, "linear_attention_nnz": 63309, "linear_dense_total": 4718592, "linear_dense_nnz": 269756}}, "total_sparsity": 58.4431701722824, "linear_sparsity": 74.92893124804085}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 31863042, "linear_total": 84934656, "linear_nnz": 7904512, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 562688, "linear_attention_total": 2359296, "linear_attention_nnz": 260096, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852736, "linear_attention_total": 2359296, "linear_attention_nnz": 361216, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 836352, "linear_attention_total": 2359296, "linear_attention_nnz": 249600, "linear_dense_total": 4718592, "linear_dense_nnz": 586752}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1037824, "linear_attention_total": 2359296, "linear_attention_nnz": 487936, "linear_dense_total": 4718592, "linear_dense_nnz": 549888}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 981760, "linear_attention_total": 2359296, "linear_attention_nnz": 315136, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893184, "linear_attention_total": 2359296, "linear_attention_nnz": 329472, "linear_dense_total": 4718592, "linear_dense_nnz": 563712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621312, "linear_attention_total": 2359296, "linear_attention_nnz": 160512, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817664, "linear_attention_total": 2359296, "linear_attention_nnz": 407552, "linear_dense_total": 4718592, "linear_dense_nnz": 410112}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 446208, "linear_attention_total": 2359296, "linear_attention_nnz": 175872, "linear_dense_total": 4718592, "linear_dense_nnz": 270336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315392, "linear_attention_total": 2359296, "linear_attention_nnz": 218624, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279552, "linear_attention_total": 2359296, "linear_attention_nnz": 187392, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 259840, "linear_attention_total": 2359296, "linear_attention_nnz": 118528, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}}, "total_sparsity": 70.73917738066733, "linear_sparsity": 90.6934196566358}, "speed": {"eval_elapsed_time": 19.15512446698267}, "opt_eval_metrics": {"exact_match": 75.42100283822138, "f1": 84.06571558378387}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 37879298, "linear_total": 84934656, "linear_nnz": 13920768, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1160960, "linear_attention_total": 2359296, "linear_attention_nnz": 454400, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245184, "linear_attention_total": 2359296, "linear_attention_nnz": 400384, "linear_dense_total": 4718592, "linear_dense_nnz": 844800}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553408, "linear_attention_total": 2359296, "linear_attention_nnz": 518144, "linear_dense_total": 4718592, "linear_dense_nnz": 1035264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1787648, "linear_attention_total": 2359296, "linear_attention_nnz": 803072, "linear_dense_total": 4718592, "linear_dense_nnz": 984576}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1640960, "linear_attention_total": 2359296, "linear_attention_nnz": 555008, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661696, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1015296}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383680, "linear_attention_total": 2359296, "linear_attention_nnz": 583424, "linear_dense_total": 4718592, "linear_dense_nnz": 800256}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1309440, "linear_attention_total": 2359296, "linear_attention_nnz": 652032, "linear_dense_total": 4718592, "linear_dense_nnz": 657408}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 689664, "linear_attention_total": 2359296, "linear_attention_nnz": 333312, "linear_dense_total": 4718592, "linear_dense_nnz": 356352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520960, "linear_attention_total": 2359296, "linear_attention_nnz": 382720, "linear_dense_total": 4718592, "linear_dense_nnz": 138240}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 461568, "linear_attention_total": 2359296, "linear_attention_nnz": 314112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 505600, "linear_attention_total": 2359296, "linear_attention_nnz": 203008, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}}, "total_sparsity": 65.21426235063046, "linear_sparsity": 83.61002604166666}, "speed": {"eval_elapsed_time": 21.517418827977963}, "opt_eval_metrics": {"exact_match": 76.82119205298014, "f1": 85.28474303662432}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35435778, "linear_total": 84934656, "linear_nnz": 11477248, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887040, "linear_attention_total": 2359296, "linear_attention_nnz": 384768, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1057792, "linear_attention_total": 2359296, "linear_attention_nnz": 355840, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1285888, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497088, "linear_attention_total": 2359296, "linear_attention_nnz": 672256, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1350912, "linear_attention_total": 2359296, "linear_attention_nnz": 418560, "linear_dense_total": 4718592, "linear_dense_nnz": 932352}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1395712, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1154816, "linear_attention_total": 2359296, "linear_attention_nnz": 498944, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059840, "linear_attention_total": 2359296, "linear_attention_nnz": 497664, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 609024, "linear_attention_total": 2359296, "linear_attention_nnz": 297216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 436224, "linear_attention_total": 2359296, "linear_attention_nnz": 316416, "linear_dense_total": 4718592, "linear_dense_nnz": 119808}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 256512, "linear_dense_total": 4718592, "linear_dense_nnz": 115200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371200, "linear_attention_total": 2359296, "linear_attention_nnz": 150016, "linear_dense_total": 4718592, "linear_dense_nnz": 221184}}, "total_sparsity": 67.45822277621669, "linear_sparsity": 86.4869671103395}, "speed": {"eval_elapsed_time": 20.808788317954168}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.29050695680083}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold_apme-sigmoied_threshold_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 61067266, "linear_total": 84934656, "linear_nnz": 37108736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3221504, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 2607104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3504128, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 2899968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4439040, "linear_attention_total": 2359296, "linear_attention_nnz": 730112, "linear_dense_total": 4718592, "linear_dense_nnz": 3708928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4859904, "linear_attention_total": 2359296, "linear_attention_nnz": 1044480, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4734976, "linear_attention_total": 2359296, "linear_attention_nnz": 1012736, "linear_dense_total": 4718592, "linear_dense_nnz": 3722240}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4660224, "linear_attention_total": 2359296, "linear_attention_nnz": 882688, "linear_dense_total": 4718592, "linear_dense_nnz": 3777536}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4239360, "linear_attention_total": 2359296, "linear_attention_nnz": 980992, "linear_dense_total": 4718592, "linear_dense_nnz": 3258368}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3137536, "linear_attention_total": 2359296, "linear_attention_nnz": 903168, "linear_dense_total": 4718592, "linear_dense_nnz": 2234368}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835008, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 1124352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877568, "linear_attention_total": 2359296, "linear_attention_nnz": 552960, "linear_dense_total": 4718592, "linear_dense_nnz": 324608}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852992, "linear_attention_total": 2359296, "linear_attention_nnz": 401408, "linear_dense_total": 4718592, "linear_dense_nnz": 451584}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746496, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 501760}}, "total_sparsity": 43.920030037508496, "linear_sparsity": 56.309076003086425}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-30000": {"stats": {"total": 108893186, "nnz": 67034114, "linear_total": 84934656, "linear_nnz": 43075584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4692480, "linear_attention_total": 2359296, "linear_attention_nnz": 892416, "linear_dense_total": 4718592, "linear_dense_nnz": 3800064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4379136, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 3657216}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4735488, "linear_attention_total": 2359296, "linear_attention_nnz": 920064, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4850688, "linear_attention_total": 2359296, "linear_attention_nnz": 1052160, "linear_dense_total": 4718592, "linear_dense_nnz": 3798528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4751616, "linear_attention_total": 2359296, "linear_attention_nnz": 1118976, "linear_dense_total": 4718592, "linear_dense_nnz": 3632640}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4547328, "linear_attention_total": 2359296, "linear_attention_nnz": 1017600, "linear_dense_total": 4718592, "linear_dense_nnz": 3529728}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4306944, "linear_attention_total": 2359296, "linear_attention_nnz": 1061376, "linear_dense_total": 4718592, "linear_dense_nnz": 3245568}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3525888, "linear_attention_total": 2359296, "linear_attention_nnz": 793344, "linear_dense_total": 4718592, "linear_dense_nnz": 2732544}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2793216, "linear_attention_total": 2359296, "linear_attention_nnz": 919296, "linear_dense_total": 4718592, "linear_dense_nnz": 1873920}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1507584, "linear_attention_total": 2359296, "linear_attention_nnz": 541440, "linear_dense_total": 4718592, "linear_dense_nnz": 966144}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130496, "linear_attention_total": 2359296, "linear_attention_nnz": 443904, "linear_dense_total": 4718592, "linear_dense_nnz": 686592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1854720, "linear_attention_total": 2359296, "linear_attention_nnz": 332544, "linear_dense_total": 4718592, "linear_dense_nnz": 1522176}}, "total_sparsity": 38.44048791078626, "linear_sparsity": 49.283854166666664}, "speed": {"eval_elapsed_time": 36.71001935296226}, "opt_eval_metrics": {"exact_match": 79.40397350993378, "f1": 86.95662988564573}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 43891202, "linear_total": 84934656, "linear_nnz": 19932672, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045184, "linear_attention_total": 2359296, "linear_attention_nnz": 427776, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102784, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 1708032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2424576, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 1955328}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2502912, "linear_attention_total": 2359296, "linear_attention_nnz": 579840, "linear_dense_total": 4718592, "linear_dense_nnz": 1923072}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2376960, "linear_attention_total": 2359296, "linear_attention_nnz": 539904, "linear_dense_total": 4718592, "linear_dense_nnz": 1837056}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2201856, "linear_attention_total": 2359296, "linear_attention_nnz": 424704, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1907712, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 1468416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1580544, "linear_attention_total": 2359296, "linear_attention_nnz": 428544, "linear_dense_total": 4718592, "linear_dense_nnz": 1152000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1095168, "linear_attention_total": 2359296, "linear_attention_nnz": 397824, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527616, "linear_attention_total": 2359296, "linear_attention_nnz": 235776, "linear_dense_total": 4718592, "linear_dense_nnz": 291840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 428544, "linear_attention_total": 2359296, "linear_attention_nnz": 182784, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738816, "linear_attention_total": 2359296, "linear_attention_nnz": 112128, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}}, "total_sparsity": 59.6933438975695, "linear_sparsity": 76.53175636574075}, "speed": {"eval_elapsed_time": 27.453239777940325}, "opt_eval_metrics": {"exact_match": 79.13907284768212, "f1": 86.92362610004827}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49229570, "linear_total": 84934656, "linear_nnz": 25271040, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214400, "linear_attention_total": 2359296, "linear_attention_nnz": 721408, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2390784, "linear_attention_total": 2359296, "linear_attention_nnz": 635136, "linear_dense_total": 4718592, "linear_dense_nnz": 1755648}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850560, "linear_attention_total": 2359296, "linear_attention_nnz": 972032, "linear_dense_total": 4718592, "linear_dense_nnz": 1878528}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3188736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 1932288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3149824, "linear_attention_total": 2359296, "linear_attention_nnz": 1260544, "linear_dense_total": 4718592, "linear_dense_nnz": 1889280}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 1784832}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2455040, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015744, "linear_attention_total": 2359296, "linear_attention_nnz": 988160, "linear_dense_total": 4718592, "linear_dense_nnz": 1027584}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1550080, "linear_attention_total": 2359296, "linear_attention_nnz": 903424, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 886784, "linear_attention_total": 2359296, "linear_attention_nnz": 636416, "linear_dense_total": 4718592, "linear_dense_nnz": 250368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 682752, "linear_attention_total": 2359296, "linear_attention_nnz": 484608, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 980224, "linear_attention_total": 2359296, "linear_attention_nnz": 313600, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}}, "total_sparsity": 54.79095450471988, "linear_sparsity": 70.2464916087963}, "speed": {"eval_elapsed_time": 27.093046838999726}, "opt_eval_metrics": {"exact_match": 80.5771050141911, "f1": 88.02575212811699}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 53943554, "linear_total": 84934656, "linear_nnz": 29985024, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3576576, "linear_attention_total": 2359296, "linear_attention_nnz": 840960, "linear_dense_total": 4718592, "linear_dense_nnz": 2735616}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070464, "linear_attention_total": 2359296, "linear_attention_nnz": 288768, "linear_dense_total": 4718592, "linear_dense_nnz": 2781696}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3222528, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 3024384}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3771648, "linear_attention_total": 2359296, "linear_attention_nnz": 770304, "linear_dense_total": 4718592, "linear_dense_nnz": 3001344}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3129600, "linear_attention_total": 2359296, "linear_attention_nnz": 393984, "linear_dense_total": 4718592, "linear_dense_nnz": 2735616}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2864640, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 2668032}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2801664, "linear_attention_total": 2359296, "linear_attention_nnz": 548352, "linear_dense_total": 4718592, "linear_dense_nnz": 2253312}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2118144, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 1920000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1646592, "linear_attention_total": 2359296, "linear_attention_nnz": 284160, "linear_dense_total": 4718592, "linear_dense_nnz": 1362432}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 861696, "linear_attention_total": 2359296, "linear_attention_nnz": 202752, "linear_dense_total": 4718592, "linear_dense_nnz": 658944}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 288000, "linear_dense_total": 4718592, "linear_dense_nnz": 907776}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725696, "linear_attention_total": 2359296, "linear_attention_nnz": 315648, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}}, "total_sparsity": 50.461956361530284, "linear_sparsity": 64.69636140046296}, "speed": {"eval_elapsed_time": 20.880222300067544}, "opt_eval_metrics": {"exact_match": 73.90728476821192, "f1": 82.48749394175648}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49808642, "linear_total": 84934656, "linear_nnz": 25850112, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2888448, "linear_attention_total": 2359296, "linear_attention_nnz": 652032, "linear_dense_total": 4718592, "linear_dense_nnz": 2236416}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2680320, "linear_attention_total": 2359296, "linear_attention_nnz": 293376, "linear_dense_total": 4718592, "linear_dense_nnz": 2386944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2864640, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 2666496}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3171072, "linear_attention_total": 2359296, "linear_attention_nnz": 530688, "linear_dense_total": 4718592, "linear_dense_nnz": 2640384}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2757888, "linear_attention_total": 2359296, "linear_attention_nnz": 392448, "linear_dense_total": 4718592, "linear_dense_nnz": 2365440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2569728, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 2373120}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2244096, "linear_attention_total": 2359296, "linear_attention_nnz": 310272, "linear_dense_total": 4718592, "linear_dense_nnz": 1933824}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1843968, "linear_attention_total": 2359296, "linear_attention_nnz": 197376, "linear_dense_total": 4718592, "linear_dense_nnz": 1646592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1384704, "linear_attention_total": 2359296, "linear_attention_nnz": 200448, "linear_dense_total": 4718592, "linear_dense_nnz": 1184256}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 204288, "linear_dense_total": 4718592, "linear_dense_nnz": 556032}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1115904, "linear_attention_total": 2359296, "linear_attention_nnz": 286464, "linear_dense_total": 4718592, "linear_dense_nnz": 829440}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1569024, "linear_attention_total": 2359296, "linear_attention_nnz": 315648, "linear_dense_total": 4718592, "linear_dense_nnz": 1253376}}, "total_sparsity": 54.25917467416189, "linear_sparsity": 69.56470630787037}, "speed": {"eval_elapsed_time": 18.93994551000651}, "opt_eval_metrics": {"exact_match": 70.05676442762535, "f1": 79.26883508935717}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 42173698, "linear_total": 84934656, "linear_nnz": 18215168, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1516544, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1758464, "linear_attention_total": 2359296, "linear_attention_nnz": 564992, "linear_dense_total": 4718592, "linear_dense_nnz": 1193472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2030080, "linear_attention_total": 2359296, "linear_attention_nnz": 646144, "linear_dense_total": 4718592, "linear_dense_nnz": 1383936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2328832, "linear_attention_total": 2359296, "linear_attention_nnz": 969472, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2283264, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1425408}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2099200, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 1396224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1846784, "linear_attention_total": 2359296, "linear_attention_nnz": 774656, "linear_dense_total": 4718592, "linear_dense_nnz": 1072128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1589760, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 783360}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 967424, "linear_attention_total": 2359296, "linear_attention_nnz": 520448, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 617216, "linear_attention_total": 2359296, "linear_attention_nnz": 435968, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 521984, "linear_attention_total": 2359296, "linear_attention_nnz": 354560, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655616, "linear_attention_total": 2359296, "linear_attention_nnz": 231680, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.27058124647028, "linear_sparsity": 78.55390383873457}, "speed": {"eval_elapsed_time": 23.66981486894656}, "opt_eval_metrics": {"exact_match": 79.2620624408704, "f1": 86.94475047733708}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42038274, "linear_total": 84934656, "linear_nnz": 18079744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1493248, "linear_attention_total": 2359296, "linear_attention_nnz": 519424, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757440, "linear_attention_total": 2359296, "linear_attention_nnz": 565504, "linear_dense_total": 4718592, "linear_dense_nnz": 1191936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028800, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1382400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2297088, "linear_attention_total": 2359296, "linear_attention_nnz": 937728, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2270464, "linear_attention_total": 2359296, "linear_attention_nnz": 846592, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081792, "linear_attention_total": 2359296, "linear_attention_nnz": 688640, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815296, "linear_attention_total": 2359296, "linear_attention_nnz": 744704, "linear_dense_total": 4718592, "linear_dense_nnz": 1070592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1613312, "linear_attention_total": 2359296, "linear_attention_nnz": 831488, "linear_dense_total": 4718592, "linear_dense_nnz": 781824}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 522496, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 594944, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 513792, "linear_attention_total": 2359296, "linear_attention_nnz": 346368, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 644096, "linear_attention_total": 2359296, "linear_attention_nnz": 220160, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.39494531824976, "linear_sparsity": 78.7133487654321}, "speed": {"eval_elapsed_time": 23.6253067109501}, "opt_eval_metrics": {"exact_match": 79.2620624408704, "f1": 86.97983371588884}}}, "base_speed_report": {"eval_elapsed_time": 38.67925895296503}} \ No newline at end of file diff --git a/analysis/files/results/results3.json b/analysis/files/results/results3.json deleted file mode 100644 index 8d82a4f5..00000000 --- a/analysis/files/results/results3.json +++ /dev/null @@ -1 +0,0 @@ -{"checkpoints": {"/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42067458, "linear_total": 84934656, "linear_nnz": 18108928, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437184, "linear_attention_total": 2359296, "linear_attention_nnz": 472576, "linear_dense_total": 4718592, "linear_dense_nnz": 964608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1754624, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015488, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2276608, "linear_attention_total": 2359296, "linear_attention_nnz": 951040, "linear_dense_total": 4718592, "linear_dense_nnz": 1325568}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2280448, "linear_attention_total": 2359296, "linear_attention_nnz": 861184, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2123008, "linear_attention_total": 2359296, "linear_attention_nnz": 779008, "linear_dense_total": 4718592, "linear_dense_nnz": 1344000}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841152, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1041408}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553664, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042432, "linear_attention_total": 2359296, "linear_attention_nnz": 610816, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 584960, "linear_attention_total": 2359296, "linear_attention_nnz": 405248, "linear_dense_total": 4718592, "linear_dense_nnz": 179712}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 540928, "linear_attention_total": 2359296, "linear_attention_nnz": 395008, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 658432, "linear_attention_total": 2359296, "linear_attention_nnz": 217600, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}}, "total_sparsity": 61.3681447432349, "linear_sparsity": 78.6789882330247}, "speed": {"eval_elapsed_time": 18.375705623999238}, "opt_eval_metrics": {"exact_match": 79.38505203405866, "f1": 87.07400986053686}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41735426, "linear_total": 84934656, "linear_nnz": 17776896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1405440, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 929280}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1732352, "linear_attention_total": 2359296, "linear_attention_nnz": 589568, "linear_dense_total": 4718592, "linear_dense_nnz": 1142784}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1979136, "linear_attention_total": 2359296, "linear_attention_nnz": 628992, "linear_dense_total": 4718592, "linear_dense_nnz": 1350144}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2218752, "linear_attention_total": 2359296, "linear_attention_nnz": 913152, "linear_dense_total": 4718592, "linear_dense_nnz": 1305600}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2257664, "linear_attention_total": 2359296, "linear_attention_nnz": 850688, "linear_dense_total": 4718592, "linear_dense_nnz": 1406976}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096384, "linear_attention_total": 2359296, "linear_attention_nnz": 764672, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1786112, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 1022976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1538816, "linear_attention_total": 2359296, "linear_attention_nnz": 781568, "linear_dense_total": 4718592, "linear_dense_nnz": 757248}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1027840, "linear_attention_total": 2359296, "linear_attention_nnz": 596224, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 176640}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523008, "linear_attention_total": 2359296, "linear_attention_nnz": 378624, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640000, "linear_attention_total": 2359296, "linear_attention_nnz": 208384, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.67306005721974, "linear_sparsity": 79.0699146412037}, "speed": {"eval_elapsed_time": 17.870226074010134}, "opt_eval_metrics": {"exact_match": 78.72280037842951, "f1": 86.62043892712619}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 43462146, "linear_total": 84934656, "linear_nnz": 19503616, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660672, "linear_attention_total": 2359296, "linear_attention_nnz": 579328, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 632576, "linear_dense_total": 4718592, "linear_dense_nnz": 1267200}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2031104, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1446912}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2544128, "linear_attention_total": 2359296, "linear_attention_nnz": 1049600, "linear_dense_total": 4718592, "linear_dense_nnz": 1494528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2395904, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 1479168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2184960, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 1394688}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1912320, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1113600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 969216, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 471808, "linear_dense_total": 4718592, "linear_dense_nnz": 497664}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 717312, "linear_attention_total": 2359296, "linear_attention_nnz": 505344, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631040, "linear_attention_total": 2359296, "linear_attention_nnz": 448256, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 288256, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}}, "total_sparsity": 60.08735936884057, "linear_sparsity": 77.03691647376543}, "speed": {"eval_elapsed_time": 16.235010980977677}, "opt_eval_metrics": {"exact_match": 78.93093661305582, "f1": 86.85787750084084}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42678018, "linear_total": 84934656, "linear_nnz": 18719488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1560320, "linear_attention_total": 2359296, "linear_attention_nnz": 543488, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1816320, "linear_attention_total": 2359296, "linear_attention_nnz": 593664, "linear_dense_total": 4718592, "linear_dense_nnz": 1222656}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2024704, "linear_attention_total": 2359296, "linear_attention_nnz": 603904, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2329856, "linear_attention_total": 2359296, "linear_attention_nnz": 870656, "linear_dense_total": 4718592, "linear_dense_nnz": 1459200}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2332928, "linear_attention_total": 2359296, "linear_attention_nnz": 887552, "linear_dense_total": 4718592, "linear_dense_nnz": 1445376}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 720640, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742080, "linear_attention_total": 2359296, "linear_attention_nnz": 926464, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 944384, "linear_attention_total": 2359296, "linear_attention_nnz": 455936, "linear_dense_total": 4718592, "linear_dense_nnz": 488448}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705280, "linear_attention_total": 2359296, "linear_attention_nnz": 505600, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 587264, "linear_attention_total": 2359296, "linear_attention_nnz": 409088, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 697856, "linear_attention_total": 2359296, "linear_attention_nnz": 250880, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}}, "total_sparsity": 60.80744850279245, "linear_sparsity": 77.96012972608024}, "speed": {"eval_elapsed_time": 16.02295208198484}, "opt_eval_metrics": {"exact_match": 78.78902554399244, "f1": 86.63899702391797}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 46192898, "linear_total": 84934656, "linear_nnz": 22234368, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 551680, "linear_dense_total": 4718592, "linear_dense_nnz": 1539072}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2278656, "linear_attention_total": 2359296, "linear_attention_nnz": 596736, "linear_dense_total": 4718592, "linear_dense_nnz": 1681920}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2418688, "linear_attention_total": 2359296, "linear_attention_nnz": 567808, "linear_dense_total": 4718592, "linear_dense_nnz": 1850880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2844416, "linear_attention_total": 2359296, "linear_attention_nnz": 1002752, "linear_dense_total": 4718592, "linear_dense_nnz": 1841664}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2691072, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 1812480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2475264, "linear_attention_total": 2359296, "linear_attention_nnz": 721152, "linear_dense_total": 4718592, "linear_dense_nnz": 1754112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2229248, "linear_attention_total": 2359296, "linear_attention_nnz": 805376, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1966336, "linear_attention_total": 2359296, "linear_attention_nnz": 892672, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701440, "linear_attention_total": 2359296, "linear_attention_nnz": 454144, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598272, "linear_attention_total": 2359296, "linear_attention_nnz": 361728, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858880, "linear_attention_total": 2359296, "linear_attention_nnz": 238336, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}}, "total_sparsity": 57.57962486284496, "linear_sparsity": 73.82179542824075}, "speed": {"eval_elapsed_time": 17.927992683951743}, "opt_eval_metrics": {"exact_match": 79.38505203405866, "f1": 86.84616693145111}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39741442, "linear_total": 84934656, "linear_nnz": 15782912, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1171968, "linear_attention_total": 2359296, "linear_attention_nnz": 511488, "linear_dense_total": 4718592, "linear_dense_nnz": 660480}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1531136, "linear_attention_total": 2359296, "linear_attention_nnz": 591104, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1722112, "linear_attention_total": 2359296, "linear_attention_nnz": 656128, "linear_dense_total": 4718592, "linear_dense_nnz": 1065984}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2136320, "linear_attention_total": 2359296, "linear_attention_nnz": 985856, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2026752, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1168896}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1822976, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 1138176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488384, "linear_attention_total": 2359296, "linear_attention_nnz": 668160, "linear_dense_total": 4718592, "linear_dense_nnz": 820224}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1423104, "linear_attention_total": 2359296, "linear_attention_nnz": 793344, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 861184, "linear_attention_total": 2359296, "linear_attention_nnz": 494080, "linear_dense_total": 4718592, "linear_dense_nnz": 367104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 570880, "linear_attention_total": 2359296, "linear_attention_nnz": 417280, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 499968, "linear_attention_total": 2359296, "linear_attention_nnz": 370944, "linear_dense_total": 4718592, "linear_dense_nnz": 129024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 528128, "linear_attention_total": 2359296, "linear_attention_nnz": 224000, "linear_dense_total": 4718592, "linear_dense_nnz": 304128}}, "total_sparsity": 63.504197590471826, "linear_sparsity": 81.41758294753086}, "speed": {"eval_elapsed_time": 16.204893412068486}, "opt_eval_metrics": {"exact_match": 78.93093661305582, "f1": 86.77654280449566}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-130000": {"stats": {"total": 108893186, "nnz": 38778370, "linear_total": 84934656, "linear_nnz": 14819840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1050624, "linear_attention_total": 2359296, "linear_attention_nnz": 488448, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 512512, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1628160, "linear_attention_total": 2359296, "linear_attention_nnz": 628224, "linear_dense_total": 4718592, "linear_dense_nnz": 999936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998592, "linear_attention_total": 2359296, "linear_attention_nnz": 937216, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1939968, "linear_attention_total": 2359296, "linear_attention_nnz": 821760, "linear_dense_total": 4718592, "linear_dense_nnz": 1118208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1709824, "linear_attention_total": 2359296, "linear_attention_nnz": 648448, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404928, "linear_attention_total": 2359296, "linear_attention_nnz": 641536, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817920, "linear_attention_total": 2359296, "linear_attention_nnz": 467712, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 544512, "linear_attention_total": 2359296, "linear_attention_nnz": 403200, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484096, "linear_attention_total": 2359296, "linear_attention_nnz": 367360, "linear_dense_total": 4718592, "linear_dense_nnz": 116736}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496896, "linear_attention_total": 2359296, "linear_attention_nnz": 225024, "linear_dense_total": 4718592, "linear_dense_nnz": 271872}}, "total_sparsity": 64.38861656596218, "linear_sparsity": 82.5514805169753}, "speed": {"eval_elapsed_time": 16.045786170987412}, "opt_eval_metrics": {"exact_match": 78.88363292336803, "f1": 86.63235572290178}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-165000": {"stats": {"total": 108893186, "nnz": 38293506, "linear_total": 84934656, "linear_nnz": 14334976, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1010688, "linear_attention_total": 2359296, "linear_attention_nnz": 468480, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1371392, "linear_attention_total": 2359296, "linear_attention_nnz": 518912, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1590272, "linear_attention_total": 2359296, "linear_attention_nnz": 608768, "linear_dense_total": 4718592, "linear_dense_nnz": 981504}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895936, "linear_attention_total": 2359296, "linear_attention_nnz": 869888, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1869568, "linear_attention_total": 2359296, "linear_attention_nnz": 775936, "linear_dense_total": 4718592, "linear_dense_nnz": 1093632}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663232, "linear_attention_total": 2359296, "linear_attention_nnz": 618752, "linear_dense_total": 4718592, "linear_dense_nnz": 1044480}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 629248, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1295872, "linear_attention_total": 2359296, "linear_attention_nnz": 707584, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 808704, "linear_attention_total": 2359296, "linear_attention_nnz": 463104, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 515840, "linear_attention_total": 2359296, "linear_attention_nnz": 376064, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455936, "linear_attention_total": 2359296, "linear_attention_nnz": 345344, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 212992, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 64.83388225963009, "linear_sparsity": 83.1223476080247}, "speed": {"eval_elapsed_time": 15.8394883510191}, "opt_eval_metrics": {"exact_match": 78.9593188268685, "f1": 86.71766917125102}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38916354, "linear_total": 84934656, "linear_nnz": 14957824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1209344, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 749568}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1494272, "linear_attention_total": 2359296, "linear_attention_nnz": 488192, "linear_dense_total": 4718592, "linear_dense_nnz": 1006080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1636096, "linear_attention_total": 2359296, "linear_attention_nnz": 550144, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969664, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1746944, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 1198080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1782272, "linear_attention_total": 2359296, "linear_attention_nnz": 653312, "linear_dense_total": 4718592, "linear_dense_nnz": 1128960}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1461760, "linear_attention_total": 2359296, "linear_attention_nnz": 593920, "linear_dense_total": 4718592, "linear_dense_nnz": 867840}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 754688, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531968, "linear_attention_total": 2359296, "linear_attention_nnz": 373760, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460032, "linear_attention_total": 2359296, "linear_attention_nnz": 311040, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}}, "total_sparsity": 64.26190156654981, "linear_sparsity": 82.38902150848766}, "speed": {"eval_elapsed_time": 15.605650334036909}, "opt_eval_metrics": {"exact_match": 78.69441816461683, "f1": 86.58409293332078}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 50872322, "linear_total": 84934656, "linear_nnz": 26913792, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692352, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 2007552}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666496, "linear_attention_total": 2359296, "linear_attention_nnz": 646656, "linear_dense_total": 4718592, "linear_dense_nnz": 2019840}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2931200, "linear_attention_total": 2359296, "linear_attention_nnz": 691712, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3361024, "linear_attention_total": 2359296, "linear_attention_nnz": 1149184, "linear_dense_total": 4718592, "linear_dense_nnz": 2211840}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3165952, "linear_attention_total": 2359296, "linear_attention_nnz": 1007872, "linear_dense_total": 4718592, "linear_dense_nnz": 2158080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070976, "linear_attention_total": 2359296, "linear_attention_nnz": 997376, "linear_dense_total": 4718592, "linear_dense_nnz": 2073600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2644480, "linear_attention_total": 2359296, "linear_attention_nnz": 911872, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2248704, "linear_attention_total": 2359296, "linear_attention_nnz": 944640, "linear_dense_total": 4718592, "linear_dense_nnz": 1304064}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1514240, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 751104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 839424, "linear_attention_total": 2359296, "linear_attention_nnz": 526080, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 707072, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 274944}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1071872, "linear_attention_total": 2359296, "linear_attention_nnz": 277760, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}}, "total_sparsity": 53.282364242699266, "linear_sparsity": 68.31235532407408}, "speed": {"eval_elapsed_time": 20.12763703102246}, "opt_eval_metrics": {"exact_match": 79.94323557237465, "f1": 87.52956877579788}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl225_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 27752545, "linear_total": 84934656, "linear_nnz": 3794015, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 465383, "linear_attention_total": 2359296, "linear_attention_nnz": 18728, "linear_dense_total": 4718592, "linear_dense_nnz": 446655}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527397, "linear_attention_total": 2359296, "linear_attention_nnz": 63059, "linear_dense_total": 4718592, "linear_dense_nnz": 464338}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516492, "linear_attention_total": 2359296, "linear_attention_nnz": 53761, "linear_dense_total": 4718592, "linear_dense_nnz": 462731}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 514972, "linear_attention_total": 2359296, "linear_attention_nnz": 84624, "linear_dense_total": 4718592, "linear_dense_nnz": 430348}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 443214, "linear_attention_total": 2359296, "linear_attention_nnz": 58345, "linear_dense_total": 4718592, "linear_dense_nnz": 384869}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 396921, "linear_attention_total": 2359296, "linear_attention_nnz": 50615, "linear_dense_total": 4718592, "linear_dense_nnz": 346306}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 319004, "linear_attention_total": 2359296, "linear_attention_nnz": 41344, "linear_dense_total": 4718592, "linear_dense_nnz": 277660}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 249183, "linear_attention_total": 2359296, "linear_attention_nnz": 47420, "linear_dense_total": 4718592, "linear_dense_nnz": 201763}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 161062, "linear_attention_total": 2359296, "linear_attention_nnz": 27562, "linear_dense_total": 4718592, "linear_dense_nnz": 133500}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 81705, "linear_attention_total": 2359296, "linear_attention_nnz": 34151, "linear_dense_total": 4718592, "linear_dense_nnz": 47554}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64643, "linear_attention_total": 2359296, "linear_attention_nnz": 21311, "linear_dense_total": 4718592, "linear_dense_nnz": 43332}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 54039, "linear_attention_total": 2359296, "linear_attention_nnz": 17233, "linear_dense_total": 4718592, "linear_dense_nnz": 36806}}, "total_sparsity": 74.51397463933142, "linear_sparsity": 95.5330189363456}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 33917936, "linear_total": 84934656, "linear_nnz": 9959406, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1111233, "linear_attention_total": 2359296, "linear_attention_nnz": 56754, "linear_dense_total": 4718592, "linear_dense_nnz": 1054479}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222867, "linear_attention_total": 2359296, "linear_attention_nnz": 116764, "linear_dense_total": 4718592, "linear_dense_nnz": 1106103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264439, "linear_attention_total": 2359296, "linear_attention_nnz": 127558, "linear_dense_total": 4718592, "linear_dense_nnz": 1136881}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1270104, "linear_attention_total": 2359296, "linear_attention_nnz": 163709, "linear_dense_total": 4718592, "linear_dense_nnz": 1106395}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1202300, "linear_attention_total": 2359296, "linear_attention_nnz": 158018, "linear_dense_total": 4718592, "linear_dense_nnz": 1044282}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1136195, "linear_attention_total": 2359296, "linear_attention_nnz": 125746, "linear_dense_total": 4718592, "linear_dense_nnz": 1010449}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 971117, "linear_attention_total": 2359296, "linear_attention_nnz": 110023, "linear_dense_total": 4718592, "linear_dense_nnz": 861094}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746075, "linear_attention_total": 2359296, "linear_attention_nnz": 113086, "linear_dense_total": 4718592, "linear_dense_nnz": 632989}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488971, "linear_attention_total": 2359296, "linear_attention_nnz": 81879, "linear_dense_total": 4718592, "linear_dense_nnz": 407092}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 250695, "linear_attention_total": 2359296, "linear_attention_nnz": 77365, "linear_dense_total": 4718592, "linear_dense_nnz": 173330}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 172793, "linear_attention_total": 2359296, "linear_attention_nnz": 50915, "linear_dense_total": 4718592, "linear_dense_nnz": 121878}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 122617, "linear_attention_total": 2359296, "linear_attention_nnz": 28303, "linear_dense_total": 4718592, "linear_dense_nnz": 94314}}, "total_sparsity": 68.85210429971255, "linear_sparsity": 88.27403739646628}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33825359, "linear_total": 84934656, "linear_nnz": 9866829, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1100628, "linear_attention_total": 2359296, "linear_attention_nnz": 56086, "linear_dense_total": 4718592, "linear_dense_nnz": 1044542}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211778, "linear_attention_total": 2359296, "linear_attention_nnz": 115328, "linear_dense_total": 4718592, "linear_dense_nnz": 1096450}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253069, "linear_attention_total": 2359296, "linear_attention_nnz": 125881, "linear_dense_total": 4718592, "linear_dense_nnz": 1127188}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258511, "linear_attention_total": 2359296, "linear_attention_nnz": 161525, "linear_dense_total": 4718592, "linear_dense_nnz": 1096986}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1191705, "linear_attention_total": 2359296, "linear_attention_nnz": 155911, "linear_dense_total": 4718592, "linear_dense_nnz": 1035794}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125428, "linear_attention_total": 2359296, "linear_attention_nnz": 123921, "linear_dense_total": 4718592, "linear_dense_nnz": 1001507}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 961919, "linear_attention_total": 2359296, "linear_attention_nnz": 108430, "linear_dense_total": 4718592, "linear_dense_nnz": 853489}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738628, "linear_attention_total": 2359296, "linear_attention_nnz": 111505, "linear_dense_total": 4718592, "linear_dense_nnz": 627123}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484188, "linear_attention_total": 2359296, "linear_attention_nnz": 80805, "linear_dense_total": 4718592, "linear_dense_nnz": 403383}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 247948, "linear_attention_total": 2359296, "linear_attention_nnz": 76456, "linear_dense_total": 4718592, "linear_dense_nnz": 171492}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 171235, "linear_attention_total": 2359296, "linear_attention_nnz": 50374, "linear_dense_total": 4718592, "linear_dense_nnz": 120861}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 121792, "linear_attention_total": 2359296, "linear_attention_nnz": 28038, "linear_dense_total": 4718592, "linear_dense_nnz": 93754}}, "total_sparsity": 68.93712063856779, "linear_sparsity": 88.38303530657733}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl150_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 29470276, "linear_total": 84934656, "linear_nnz": 5511746, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655184, "linear_attention_total": 2359296, "linear_attention_nnz": 30729, "linear_dense_total": 4718592, "linear_dense_nnz": 624455}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 733131, "linear_attention_total": 2359296, "linear_attention_nnz": 77742, "linear_dense_total": 4718592, "linear_dense_nnz": 655389}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730379, "linear_attention_total": 2359296, "linear_attention_nnz": 70206, "linear_dense_total": 4718592, "linear_dense_nnz": 660173}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 734451, "linear_attention_total": 2359296, "linear_attention_nnz": 106339, "linear_dense_total": 4718592, "linear_dense_nnz": 628112}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655863, "linear_attention_total": 2359296, "linear_attention_nnz": 81845, "linear_dense_total": 4718592, "linear_dense_nnz": 574018}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 606306, "linear_attention_total": 2359296, "linear_attention_nnz": 68554, "linear_dense_total": 4718592, "linear_dense_nnz": 537752}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 492846, "linear_attention_total": 2359296, "linear_attention_nnz": 58217, "linear_dense_total": 4718592, "linear_dense_nnz": 434629}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379389, "linear_attention_total": 2359296, "linear_attention_nnz": 65705, "linear_dense_total": 4718592, "linear_dense_nnz": 313684}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 243207, "linear_attention_total": 2359296, "linear_attention_nnz": 39483, "linear_dense_total": 4718592, "linear_dense_nnz": 203724}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 119606, "linear_attention_total": 2359296, "linear_attention_nnz": 46007, "linear_dense_total": 4718592, "linear_dense_nnz": 73599}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 89281, "linear_attention_total": 2359296, "linear_attention_nnz": 27892, "linear_dense_total": 4718592, "linear_dense_nnz": 61389}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 72103, "linear_attention_total": 2359296, "linear_attention_nnz": 20781, "linear_dense_total": 4718592, "linear_dense_nnz": 51322}}, "total_sparsity": 72.93652882926945, "linear_sparsity": 93.51060419906804}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36773378, "linear_total": 84934656, "linear_nnz": 12814848, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1044480, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1177088, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1450496, "linear_attention_total": 2359296, "linear_attention_nnz": 492032, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652224, "linear_attention_total": 2359296, "linear_attention_nnz": 733696, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1511680, "linear_attention_total": 2359296, "linear_attention_nnz": 461056, "linear_dense_total": 4718592, "linear_dense_nnz": 1050624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1533952, "linear_attention_total": 2359296, "linear_attention_nnz": 580096, "linear_dense_total": 4718592, "linear_dense_nnz": 953856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1227520, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 764928}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 624384, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 700416, "linear_attention_total": 2359296, "linear_attention_nnz": 351744, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 479744, "linear_attention_total": 2359296, "linear_attention_nnz": 339968, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411392, "linear_attention_total": 2359296, "linear_attention_nnz": 276224, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 430080, "linear_attention_total": 2359296, "linear_attention_nnz": 178176, "linear_dense_total": 4718592, "linear_dense_nnz": 251904}}, "total_sparsity": 66.22986308803564, "linear_sparsity": 84.912109375}, "speed": {"eval_elapsed_time": 14.409963917918503}, "opt_eval_metrics": {"exact_match": 77.93755912961211, "f1": 86.0611894864831}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 46385410, "linear_total": 84934656, "linear_nnz": 22426880, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2004992, "linear_attention_total": 2359296, "linear_attention_nnz": 594944, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2177280, "linear_attention_total": 2359296, "linear_attention_nnz": 672000, "linear_dense_total": 4718592, "linear_dense_nnz": 1505280}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2592256, "linear_attention_total": 2359296, "linear_attention_nnz": 859648, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2838016, "linear_attention_total": 2359296, "linear_attention_nnz": 1172992, "linear_dense_total": 4718592, "linear_dense_nnz": 1665024}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2712832, "linear_attention_total": 2359296, "linear_attention_nnz": 1026304, "linear_dense_total": 4718592, "linear_dense_nnz": 1686528}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2600448, "linear_attention_total": 2359296, "linear_attention_nnz": 976896, "linear_dense_total": 4718592, "linear_dense_nnz": 1623552}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2246144, "linear_attention_total": 2359296, "linear_attention_nnz": 955904, "linear_dense_total": 4718592, "linear_dense_nnz": 1290240}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842688, "linear_attention_total": 2359296, "linear_attention_nnz": 901120, "linear_dense_total": 4718592, "linear_dense_nnz": 941568}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1241856, "linear_attention_total": 2359296, "linear_attention_nnz": 718080, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 693760, "linear_attention_total": 2359296, "linear_attention_nnz": 475648, "linear_dense_total": 4718592, "linear_dense_nnz": 218112}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640768, "linear_attention_total": 2359296, "linear_attention_nnz": 441088, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835840, "linear_attention_total": 2359296, "linear_attention_nnz": 246016, "linear_dense_total": 4718592, "linear_dense_nnz": 589824}}, "total_sparsity": 57.4028351048522, "linear_sparsity": 73.59513647762346}, "speed": {"eval_elapsed_time": 19.68077167298179}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 87.07646648866317}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 43189250, "linear_total": 84934656, "linear_nnz": 19230720, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1584896, "linear_attention_total": 2359296, "linear_attention_nnz": 494336, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1917184, "linear_attention_total": 2359296, "linear_attention_nnz": 631552, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2092032, "linear_attention_total": 2359296, "linear_attention_nnz": 648192, "linear_dense_total": 4718592, "linear_dense_nnz": 1443840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2466816, "linear_attention_total": 2359296, "linear_attention_nnz": 1047552, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2403328, "linear_attention_total": 2359296, "linear_attention_nnz": 942592, "linear_dense_total": 4718592, "linear_dense_nnz": 1460736}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2211072, "linear_attention_total": 2359296, "linear_attention_nnz": 837888, "linear_dense_total": 4718592, "linear_dense_nnz": 1373184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1936640, "linear_attention_total": 2359296, "linear_attention_nnz": 841472, "linear_dense_total": 4718592, "linear_dense_nnz": 1095168}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661440, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 827904}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1084160, "linear_attention_total": 2359296, "linear_attention_nnz": 621824, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621056, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 188928}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 568064, "linear_attention_total": 2359296, "linear_attention_nnz": 411392, "linear_dense_total": 4718592, "linear_dense_nnz": 156672}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 684032, "linear_attention_total": 2359296, "linear_attention_nnz": 223232, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 60.33796825450584, "linear_sparsity": 77.3582175925926}, "speed": {"eval_elapsed_time": 18.309701333986595}, "opt_eval_metrics": {"exact_match": 78.74172185430463, "f1": 86.7418554019491}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42070530, "linear_total": 84934656, "linear_nnz": 18112000, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451008, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 970752}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835264, "linear_attention_total": 2359296, "linear_attention_nnz": 620288, "linear_dense_total": 4718592, "linear_dense_nnz": 1214976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2000384, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1374720}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2271232, "linear_attention_total": 2359296, "linear_attention_nnz": 933376, "linear_dense_total": 4718592, "linear_dense_nnz": 1337856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2267904, "linear_attention_total": 2359296, "linear_attention_nnz": 862464, "linear_dense_total": 4718592, "linear_dense_nnz": 1405440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081536, "linear_attention_total": 2359296, "linear_attention_nnz": 783616, "linear_dense_total": 4718592, "linear_dense_nnz": 1297920}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1807104, "linear_attention_total": 2359296, "linear_attention_nnz": 773376, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602048, "linear_attention_total": 2359296, "linear_attention_nnz": 811008, "linear_dense_total": 4718592, "linear_dense_nnz": 791040}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1009920, "linear_attention_total": 2359296, "linear_attention_nnz": 572160, "linear_dense_total": 4718592, "linear_dense_nnz": 437760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 543232, "linear_attention_total": 2359296, "linear_attention_nnz": 392704, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649472, "linear_attention_total": 2359296, "linear_attention_nnz": 214784, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}}, "total_sparsity": 61.365323630075444, "linear_sparsity": 78.67537133487654}, "speed": {"eval_elapsed_time": 18.103150750976056}, "opt_eval_metrics": {"exact_match": 78.92147587511826, "f1": 86.74888507219117}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41670402, "linear_total": 84934656, "linear_nnz": 17711872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1409024, "linear_attention_total": 2359296, "linear_attention_nnz": 468992, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1792000, "linear_attention_total": 2359296, "linear_attention_nnz": 606208, "linear_dense_total": 4718592, "linear_dense_nnz": 1185792}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1974272, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1348608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2231552, "linear_attention_total": 2359296, "linear_attention_nnz": 910592, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2209536, "linear_attention_total": 2359296, "linear_attention_nnz": 828672, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2046464, "linear_attention_total": 2359296, "linear_attention_nnz": 765440, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1764096, "linear_attention_total": 2359296, "linear_attention_nnz": 761088, "linear_dense_total": 4718592, "linear_dense_nnz": 1003008}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1573120, "linear_attention_total": 2359296, "linear_attention_nnz": 792832, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 986880, "linear_attention_total": 2359296, "linear_attention_nnz": 553728, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 572672, "linear_attention_total": 2359296, "linear_attention_nnz": 389888, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 525568, "linear_attention_total": 2359296, "linear_attention_nnz": 378112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}}, "total_sparsity": 61.73277361909495, "linear_sparsity": 79.14647231867285}, "speed": {"eval_elapsed_time": 17.77731288096402}, "opt_eval_metrics": {"exact_match": 78.76064333017976, "f1": 86.70283536757672}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41478658, "linear_total": 84934656, "linear_nnz": 17520128, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404160, "linear_attention_total": 2359296, "linear_attention_nnz": 465664, "linear_dense_total": 4718592, "linear_dense_nnz": 938496}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1766912, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1182720}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1961216, "linear_attention_total": 2359296, "linear_attention_nnz": 615680, "linear_dense_total": 4718592, "linear_dense_nnz": 1345536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 895488, "linear_dense_total": 4718592, "linear_dense_nnz": 1314816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2189824, "linear_attention_total": 2359296, "linear_attention_nnz": 812032, "linear_dense_total": 4718592, "linear_dense_nnz": 1377792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2038016, "linear_attention_total": 2359296, "linear_attention_nnz": 755456, "linear_dense_total": 4718592, "linear_dense_nnz": 1282560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1738240, "linear_attention_total": 2359296, "linear_attention_nnz": 739840, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571584, "linear_attention_total": 2359296, "linear_attention_nnz": 797440, "linear_dense_total": 4718592, "linear_dense_nnz": 774144}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 943872, "linear_attention_total": 2359296, "linear_attention_nnz": 513792, "linear_dense_total": 4718592, "linear_dense_nnz": 430080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563968, "linear_attention_total": 2359296, "linear_attention_nnz": 381184, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516352, "linear_attention_total": 2359296, "linear_attention_nnz": 370432, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 615680, "linear_attention_total": 2359296, "linear_attention_nnz": 200960, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}}, "total_sparsity": 61.90885809879785, "linear_sparsity": 79.37222704475309}, "speed": {"eval_elapsed_time": 17.746100773918442}, "opt_eval_metrics": {"exact_match": 78.85525070955535, "f1": 86.78368120366805}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40218943, "linear_total": 84934656, "linear_nnz": 16260413, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725694, "linear_attention_total": 2359296, "linear_attention_nnz": 36794, "linear_dense_total": 4718592, "linear_dense_nnz": 1688900}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959620, "linear_attention_total": 2359296, "linear_attention_nnz": 233028, "linear_dense_total": 4718592, "linear_dense_nnz": 1726592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969125, "linear_attention_total": 2359296, "linear_attention_nnz": 194318, "linear_dense_total": 4718592, "linear_dense_nnz": 1774807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2012358, "linear_attention_total": 2359296, "linear_attention_nnz": 270153, "linear_dense_total": 4718592, "linear_dense_nnz": 1742205}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860862, "linear_attention_total": 2359296, "linear_attention_nnz": 207935, "linear_dense_total": 4718592, "linear_dense_nnz": 1652927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815188, "linear_attention_total": 2359296, "linear_attention_nnz": 215427, "linear_dense_total": 4718592, "linear_dense_nnz": 1599761}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518978, "linear_attention_total": 2359296, "linear_attention_nnz": 114563, "linear_dense_total": 4718592, "linear_dense_nnz": 1404415}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307646, "linear_attention_total": 2359296, "linear_attention_nnz": 165011, "linear_dense_total": 4718592, "linear_dense_nnz": 1142635}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 946142, "linear_attention_total": 2359296, "linear_attention_nnz": 86589, "linear_dense_total": 4718592, "linear_dense_nnz": 859553}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531809, "linear_attention_total": 2359296, "linear_attention_nnz": 110020, "linear_dense_total": 4718592, "linear_dense_nnz": 421789}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 419075, "linear_attention_total": 2359296, "linear_attention_nnz": 89475, "linear_dense_total": 4718592, "linear_dense_nnz": 329600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 193916, "linear_attention_total": 2359296, "linear_attention_nnz": 45791, "linear_dense_total": 4718592, "linear_dense_nnz": 148125}}, "total_sparsity": 63.065693568741764, "linear_sparsity": 80.85538487375518}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 30451970, "linear_total": 84934656, "linear_nnz": 6493440, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 490240, "linear_attention_total": 2359296, "linear_attention_nnz": 259840, "linear_dense_total": 4718592, "linear_dense_nnz": 230400}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591104, "linear_attention_total": 2359296, "linear_attention_nnz": 225536, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 770048, "linear_attention_total": 2359296, "linear_attention_nnz": 286208, "linear_dense_total": 4718592, "linear_dense_nnz": 483840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 863488, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 747008, "linear_attention_total": 2359296, "linear_attention_nnz": 214016, "linear_dense_total": 4718592, "linear_dense_nnz": 532992}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 763392, "linear_attention_total": 2359296, "linear_attention_nnz": 285696, "linear_dense_total": 4718592, "linear_dense_nnz": 477696}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 499456, "linear_attention_total": 2359296, "linear_attention_nnz": 113920, "linear_dense_total": 4718592, "linear_dense_nnz": 385536}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 650752, "linear_attention_total": 2359296, "linear_attention_nnz": 303616, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 407808, "linear_attention_total": 2359296, "linear_attention_nnz": 162048, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 293888, "linear_attention_total": 2359296, "linear_attention_nnz": 206336, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 206336, "linear_attention_total": 2359296, "linear_attention_nnz": 117248, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 209920, "linear_attention_total": 2359296, "linear_attention_nnz": 103936, "linear_dense_total": 4718592, "linear_dense_nnz": 105984}}, "total_sparsity": 72.03500869191208, "linear_sparsity": 92.35478153935185}, "speed": {"eval_elapsed_time": 11.2053101999918}, "opt_eval_metrics": {"exact_match": 73.92620624408704, "f1": 83.01994135540168}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 30144002, "linear_total": 84934656, "linear_nnz": 6185472, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 462848, "linear_attention_total": 2359296, "linear_attention_nnz": 237056, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566784, "linear_attention_total": 2359296, "linear_attention_nnz": 219648, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 744960, "linear_attention_total": 2359296, "linear_attention_nnz": 278016, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 823552, "linear_attention_total": 2359296, "linear_attention_nnz": 379648, "linear_dense_total": 4718592, "linear_dense_nnz": 443904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708352, "linear_attention_total": 2359296, "linear_attention_nnz": 193792, "linear_dense_total": 4718592, "linear_dense_nnz": 514560}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 703488, "linear_attention_total": 2359296, "linear_attention_nnz": 247296, "linear_dense_total": 4718592, "linear_dense_nnz": 456192}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488448, "linear_attention_total": 2359296, "linear_attention_nnz": 118272, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 636160, "linear_attention_total": 2359296, "linear_attention_nnz": 296704, "linear_dense_total": 4718592, "linear_dense_nnz": 339456}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 393728, "linear_attention_total": 2359296, "linear_attention_nnz": 152576, "linear_dense_total": 4718592, "linear_dense_nnz": 241152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 278528, "linear_attention_total": 2359296, "linear_attention_nnz": 190976, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 190976, "linear_attention_total": 2359296, "linear_attention_nnz": 104960, "linear_dense_total": 4718592, "linear_dense_nnz": 86016}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 187648, "linear_attention_total": 2359296, "linear_attention_nnz": 90880, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 72.31782528614784, "linear_sparsity": 92.7173755787037}, "speed": {"eval_elapsed_time": 11.106899423059076}, "opt_eval_metrics": {"exact_match": 73.48155156102176, "f1": 82.77426887329388}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 30105858, "linear_total": 84934656, "linear_nnz": 6147328, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460800, "linear_attention_total": 2359296, "linear_attention_nnz": 235008, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566272, "linear_attention_total": 2359296, "linear_attention_nnz": 220672, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727040, "linear_attention_total": 2359296, "linear_attention_nnz": 260096, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 821760, "linear_attention_total": 2359296, "linear_attention_nnz": 380928, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 718080, "linear_attention_total": 2359296, "linear_attention_nnz": 215808, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 706048, "linear_attention_total": 2359296, "linear_attention_nnz": 251392, "linear_dense_total": 4718592, "linear_dense_nnz": 454656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 475648, "linear_attention_total": 2359296, "linear_attention_nnz": 103936, "linear_dense_total": 4718592, "linear_dense_nnz": 371712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 624384, "linear_attention_total": 2359296, "linear_attention_nnz": 284928, "linear_dense_total": 4718592, "linear_dense_nnz": 339456}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 388608, "linear_attention_total": 2359296, "linear_attention_nnz": 147456, "linear_dense_total": 4718592, "linear_dense_nnz": 241152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 268800, "linear_attention_total": 2359296, "linear_attention_nnz": 181248, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 199168, "linear_attention_total": 2359296, "linear_attention_nnz": 113152, "linear_dense_total": 4718592, "linear_dense_nnz": 86016}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 190720, "linear_attention_total": 2359296, "linear_attention_nnz": 93952, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 72.35285410787779, "linear_sparsity": 92.76228539737654}, "speed": {"eval_elapsed_time": 11.171043560025282}, "opt_eval_metrics": {"exact_match": 73.04635761589404, "f1": 82.29210924509454}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 36176130, "linear_total": 84934656, "linear_nnz": 12217600, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 911360, "linear_attention_total": 2359296, "linear_attention_nnz": 352256, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1284864, "linear_attention_total": 2359296, "linear_attention_nnz": 478464, "linear_dense_total": 4718592, "linear_dense_nnz": 806400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1386240, "linear_attention_total": 2359296, "linear_attention_nnz": 461568, "linear_dense_total": 4718592, "linear_dense_nnz": 924672}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1616128, "linear_attention_total": 2359296, "linear_attention_nnz": 763648, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1410048, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1415424, "linear_attention_total": 2359296, "linear_attention_nnz": 509184, "linear_dense_total": 4718592, "linear_dense_nnz": 906240}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1157632, "linear_attention_total": 2359296, "linear_attention_nnz": 458752, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1138688, "linear_attention_total": 2359296, "linear_attention_nnz": 550400, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 661504, "linear_attention_total": 2359296, "linear_attention_nnz": 311296, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 448512, "linear_attention_total": 2359296, "linear_attention_nnz": 319488, "linear_dense_total": 4718592, "linear_dense_nnz": 129024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379904, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 407296, "linear_attention_total": 2359296, "linear_attention_nnz": 160000, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}}, "total_sparsity": 66.7783345047871, "linear_sparsity": 85.6152946566358}, "speed": {"eval_elapsed_time": 14.883674454060383}, "opt_eval_metrics": {"exact_match": 76.43330179754021, "f1": 84.92125512821515}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33844482, "linear_total": 84934656, "linear_nnz": 9885952, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701184, "linear_attention_total": 2359296, "linear_attention_nnz": 295680, "linear_dense_total": 4718592, "linear_dense_nnz": 405504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042688, "linear_attention_total": 2359296, "linear_attention_nnz": 380672, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1087744, "linear_attention_total": 2359296, "linear_attention_nnz": 328960, "linear_dense_total": 4718592, "linear_dense_nnz": 758784}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1340672, "linear_attention_total": 2359296, "linear_attention_nnz": 612608, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1142784, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 811008}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1165312, "linear_attention_total": 2359296, "linear_attention_nnz": 411136, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908032, "linear_attention_total": 2359296, "linear_attention_nnz": 319744, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 956672, "linear_attention_total": 2359296, "linear_attention_nnz": 457472, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 557056, "linear_attention_total": 2359296, "linear_attention_nnz": 246784, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360192, "linear_attention_total": 2359296, "linear_attention_nnz": 252672, "linear_dense_total": 4718592, "linear_dense_nnz": 107520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315904, "linear_attention_total": 2359296, "linear_attention_nnz": 202240, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 307712, "linear_attention_total": 2359296, "linear_attention_nnz": 129536, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}}, "total_sparsity": 68.91955939281638, "linear_sparsity": 88.36052035108025}, "speed": {"eval_elapsed_time": 14.104866776964627}, "opt_eval_metrics": {"exact_match": 76.3670766319773, "f1": 84.90500621616839}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl300_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 26853628, "linear_total": 84934656, "linear_nnz": 2895098, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 357857, "linear_attention_total": 2359296, "linear_attention_nnz": 13195, "linear_dense_total": 4718592, "linear_dense_nnz": 344662}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 405482, "linear_attention_total": 2359296, "linear_attention_nnz": 53357, "linear_dense_total": 4718592, "linear_dense_nnz": 352125}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 395119, "linear_attention_total": 2359296, "linear_attention_nnz": 43981, "linear_dense_total": 4718592, "linear_dense_nnz": 351138}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 394117, "linear_attention_total": 2359296, "linear_attention_nnz": 71058, "linear_dense_total": 4718592, "linear_dense_nnz": 323059}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 335373, "linear_attention_total": 2359296, "linear_attention_nnz": 47705, "linear_dense_total": 4718592, "linear_dense_nnz": 287668}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 292526, "linear_attention_total": 2359296, "linear_attention_nnz": 40348, "linear_dense_total": 4718592, "linear_dense_nnz": 252178}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 238114, "linear_attention_total": 2359296, "linear_attention_nnz": 33002, "linear_dense_total": 4718592, "linear_dense_nnz": 205112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 188891, "linear_attention_total": 2359296, "linear_attention_nnz": 38753, "linear_dense_total": 4718592, "linear_dense_nnz": 150138}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 123365, "linear_attention_total": 2359296, "linear_attention_nnz": 22052, "linear_dense_total": 4718592, "linear_dense_nnz": 101313}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64415, "linear_attention_total": 2359296, "linear_attention_nnz": 28498, "linear_dense_total": 4718592, "linear_dense_nnz": 35917}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 53470, "linear_attention_total": 2359296, "linear_attention_nnz": 18747, "linear_dense_total": 4718592, "linear_dense_nnz": 34723}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 46369, "linear_attention_total": 2359296, "linear_attention_nnz": 15957, "linear_dense_total": 4718592, "linear_dense_nnz": 30412}}, "total_sparsity": 75.33947808267818, "linear_sparsity": 96.59138196780358}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 38110440, "linear_total": 84934656, "linear_nnz": 14151910, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1521793, "linear_attention_total": 2359296, "linear_attention_nnz": 87221, "linear_dense_total": 4718592, "linear_dense_nnz": 1434572}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637844, "linear_attention_total": 2359296, "linear_attention_nnz": 157517, "linear_dense_total": 4718592, "linear_dense_nnz": 1480327}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1723746, "linear_attention_total": 2359296, "linear_attention_nnz": 188172, "linear_dense_total": 4718592, "linear_dense_nnz": 1535574}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742961, "linear_attention_total": 2359296, "linear_attention_nnz": 230341, "linear_dense_total": 4718592, "linear_dense_nnz": 1512620}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1687428, "linear_attention_total": 2359296, "linear_attention_nnz": 240387, "linear_dense_total": 4718592, "linear_dense_nnz": 1447041}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1623377, "linear_attention_total": 2359296, "linear_attention_nnz": 195780, "linear_dense_total": 4718592, "linear_dense_nnz": 1427597}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1429982, "linear_attention_total": 2359296, "linear_attention_nnz": 184963, "linear_dense_total": 4718592, "linear_dense_nnz": 1245019}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130199, "linear_attention_total": 2359296, "linear_attention_nnz": 172954, "linear_dense_total": 4718592, "linear_dense_nnz": 957245}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 773896, "linear_attention_total": 2359296, "linear_attention_nnz": 138133, "linear_dense_total": 4718592, "linear_dense_nnz": 635763}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417863, "linear_attention_total": 2359296, "linear_attention_nnz": 112972, "linear_dense_total": 4718592, "linear_dense_nnz": 304891}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279992, "linear_attention_total": 2359296, "linear_attention_nnz": 75446, "linear_dense_total": 4718592, "linear_dense_nnz": 204546}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 182829, "linear_attention_total": 2359296, "linear_attention_nnz": 38439, "linear_dense_total": 4718592, "linear_dense_nnz": 144390}}, "total_sparsity": 65.00199746198996, "linear_sparsity": 83.3378850677867}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37366845, "linear_total": 84934656, "linear_nnz": 13408315, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1442154, "linear_attention_total": 2359296, "linear_attention_nnz": 79341, "linear_dense_total": 4718592, "linear_dense_nnz": 1362813}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1557975, "linear_attention_total": 2359296, "linear_attention_nnz": 146964, "linear_dense_total": 4718592, "linear_dense_nnz": 1411011}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637409, "linear_attention_total": 2359296, "linear_attention_nnz": 173655, "linear_dense_total": 4718592, "linear_dense_nnz": 1463754}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1655712, "linear_attention_total": 2359296, "linear_attention_nnz": 213353, "linear_dense_total": 4718592, "linear_dense_nnz": 1442359}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601748, "linear_attention_total": 2359296, "linear_attention_nnz": 221518, "linear_dense_total": 4718592, "linear_dense_nnz": 1380230}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1539647, "linear_attention_total": 2359296, "linear_attention_nnz": 179373, "linear_dense_total": 4718592, "linear_dense_nnz": 1360274}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1352289, "linear_attention_total": 2359296, "linear_attention_nnz": 168393, "linear_dense_total": 4718592, "linear_dense_nnz": 1183896}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1066215, "linear_attention_total": 2359296, "linear_attention_nnz": 159612, "linear_dense_total": 4718592, "linear_dense_nnz": 906603}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727923, "linear_attention_total": 2359296, "linear_attention_nnz": 127230, "linear_dense_total": 4718592, "linear_dense_nnz": 600693}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 390947, "linear_attention_total": 2359296, "linear_attention_nnz": 105257, "linear_dense_total": 4718592, "linear_dense_nnz": 285690}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 262617, "linear_attention_total": 2359296, "linear_attention_nnz": 70746, "linear_dense_total": 4718592, "linear_dense_nnz": 191871}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 173679, "linear_attention_total": 2359296, "linear_attention_nnz": 36271, "linear_dense_total": 4718592, "linear_dense_nnz": 137408}}, "total_sparsity": 65.68486388119823, "linear_sparsity": 84.21337575088313}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 45252556, "linear_total": 84934656, "linear_nnz": 21294026, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2152743, "linear_attention_total": 2359296, "linear_attention_nnz": 158912, "linear_dense_total": 4718592, "linear_dense_nnz": 1993831}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2265132, "linear_attention_total": 2359296, "linear_attention_nnz": 234395, "linear_dense_total": 4718592, "linear_dense_nnz": 2030737}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415512, "linear_attention_total": 2359296, "linear_attention_nnz": 301048, "linear_dense_total": 4718592, "linear_dense_nnz": 2114464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465567, "linear_attention_total": 2359296, "linear_attention_nnz": 358791, "linear_dense_total": 4718592, "linear_dense_nnz": 2106776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2457267, "linear_attention_total": 2359296, "linear_attention_nnz": 398673, "linear_dense_total": 4718592, "linear_dense_nnz": 2058594}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2410577, "linear_attention_total": 2359296, "linear_attention_nnz": 367333, "linear_dense_total": 4718592, "linear_dense_nnz": 2043244}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2206780, "linear_attention_total": 2359296, "linear_attention_nnz": 344288, "linear_dense_total": 4718592, "linear_dense_nnz": 1862492}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1819031, "linear_attention_total": 2359296, "linear_attention_nnz": 304514, "linear_dense_total": 4718592, "linear_dense_nnz": 1514517}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1364821, "linear_attention_total": 2359296, "linear_attention_nnz": 265513, "linear_dense_total": 4718592, "linear_dense_nnz": 1099308}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 828990, "linear_attention_total": 2359296, "linear_attention_nnz": 201714, "linear_dense_total": 4718592, "linear_dense_nnz": 627276}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574541, "linear_attention_total": 2359296, "linear_attention_nnz": 134277, "linear_dense_total": 4718592, "linear_dense_nnz": 440264}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 333065, "linear_attention_total": 2359296, "linear_attention_nnz": 63309, "linear_dense_total": 4718592, "linear_dense_nnz": 269756}}, "total_sparsity": 58.4431701722824, "linear_sparsity": 74.92893124804085}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 31863042, "linear_total": 84934656, "linear_nnz": 7904512, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 562688, "linear_attention_total": 2359296, "linear_attention_nnz": 260096, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852736, "linear_attention_total": 2359296, "linear_attention_nnz": 361216, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 836352, "linear_attention_total": 2359296, "linear_attention_nnz": 249600, "linear_dense_total": 4718592, "linear_dense_nnz": 586752}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1037824, "linear_attention_total": 2359296, "linear_attention_nnz": 487936, "linear_dense_total": 4718592, "linear_dense_nnz": 549888}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 981760, "linear_attention_total": 2359296, "linear_attention_nnz": 315136, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893184, "linear_attention_total": 2359296, "linear_attention_nnz": 329472, "linear_dense_total": 4718592, "linear_dense_nnz": 563712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621312, "linear_attention_total": 2359296, "linear_attention_nnz": 160512, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817664, "linear_attention_total": 2359296, "linear_attention_nnz": 407552, "linear_dense_total": 4718592, "linear_dense_nnz": 410112}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 446208, "linear_attention_total": 2359296, "linear_attention_nnz": 175872, "linear_dense_total": 4718592, "linear_dense_nnz": 270336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315392, "linear_attention_total": 2359296, "linear_attention_nnz": 218624, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279552, "linear_attention_total": 2359296, "linear_attention_nnz": 187392, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 259840, "linear_attention_total": 2359296, "linear_attention_nnz": 118528, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}}, "total_sparsity": 70.73917738066733, "linear_sparsity": 90.6934196566358}, "speed": {"eval_elapsed_time": 12.247032377053984}, "opt_eval_metrics": {"exact_match": 75.42100283822138, "f1": 84.06571558378387}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 37879298, "linear_total": 84934656, "linear_nnz": 13920768, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1160960, "linear_attention_total": 2359296, "linear_attention_nnz": 454400, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245184, "linear_attention_total": 2359296, "linear_attention_nnz": 400384, "linear_dense_total": 4718592, "linear_dense_nnz": 844800}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553408, "linear_attention_total": 2359296, "linear_attention_nnz": 518144, "linear_dense_total": 4718592, "linear_dense_nnz": 1035264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1787648, "linear_attention_total": 2359296, "linear_attention_nnz": 803072, "linear_dense_total": 4718592, "linear_dense_nnz": 984576}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1640960, "linear_attention_total": 2359296, "linear_attention_nnz": 555008, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661696, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1015296}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383680, "linear_attention_total": 2359296, "linear_attention_nnz": 583424, "linear_dense_total": 4718592, "linear_dense_nnz": 800256}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1309440, "linear_attention_total": 2359296, "linear_attention_nnz": 652032, "linear_dense_total": 4718592, "linear_dense_nnz": 657408}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 689664, "linear_attention_total": 2359296, "linear_attention_nnz": 333312, "linear_dense_total": 4718592, "linear_dense_nnz": 356352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520960, "linear_attention_total": 2359296, "linear_attention_nnz": 382720, "linear_dense_total": 4718592, "linear_dense_nnz": 138240}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 461568, "linear_attention_total": 2359296, "linear_attention_nnz": 314112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 505600, "linear_attention_total": 2359296, "linear_attention_nnz": 203008, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}}, "total_sparsity": 65.21426235063046, "linear_sparsity": 83.61002604166666}, "speed": {"eval_elapsed_time": 14.720699563040398}, "opt_eval_metrics": {"exact_match": 76.82119205298014, "f1": 85.28474303662432}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35435778, "linear_total": 84934656, "linear_nnz": 11477248, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887040, "linear_attention_total": 2359296, "linear_attention_nnz": 384768, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1057792, "linear_attention_total": 2359296, "linear_attention_nnz": 355840, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1285888, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497088, "linear_attention_total": 2359296, "linear_attention_nnz": 672256, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1350912, "linear_attention_total": 2359296, "linear_attention_nnz": 418560, "linear_dense_total": 4718592, "linear_dense_nnz": 932352}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1395712, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1154816, "linear_attention_total": 2359296, "linear_attention_nnz": 498944, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059840, "linear_attention_total": 2359296, "linear_attention_nnz": 497664, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 609024, "linear_attention_total": 2359296, "linear_attention_nnz": 297216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 436224, "linear_attention_total": 2359296, "linear_attention_nnz": 316416, "linear_dense_total": 4718592, "linear_dense_nnz": 119808}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 256512, "linear_dense_total": 4718592, "linear_dense_nnz": 115200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371200, "linear_attention_total": 2359296, "linear_attention_nnz": 150016, "linear_dense_total": 4718592, "linear_dense_nnz": 221184}}, "total_sparsity": 67.45822277621669, "linear_sparsity": 86.4869671103395}, "speed": {"eval_elapsed_time": 13.966550998971798}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.29050695680083}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold_apme-sigmoied_threshold_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 61067266, "linear_total": 84934656, "linear_nnz": 37108736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3221504, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 2607104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3504128, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 2899968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4439040, "linear_attention_total": 2359296, "linear_attention_nnz": 730112, "linear_dense_total": 4718592, "linear_dense_nnz": 3708928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4859904, "linear_attention_total": 2359296, "linear_attention_nnz": 1044480, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4734976, "linear_attention_total": 2359296, "linear_attention_nnz": 1012736, "linear_dense_total": 4718592, "linear_dense_nnz": 3722240}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4660224, "linear_attention_total": 2359296, "linear_attention_nnz": 882688, "linear_dense_total": 4718592, "linear_dense_nnz": 3777536}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4239360, "linear_attention_total": 2359296, "linear_attention_nnz": 980992, "linear_dense_total": 4718592, "linear_dense_nnz": 3258368}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3137536, "linear_attention_total": 2359296, "linear_attention_nnz": 903168, "linear_dense_total": 4718592, "linear_dense_nnz": 2234368}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835008, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 1124352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877568, "linear_attention_total": 2359296, "linear_attention_nnz": 552960, "linear_dense_total": 4718592, "linear_dense_nnz": 324608}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852992, "linear_attention_total": 2359296, "linear_attention_nnz": 401408, "linear_dense_total": 4718592, "linear_dense_nnz": 451584}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746496, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 501760}}, "total_sparsity": 43.920030037508496, "linear_sparsity": 56.309076003086425}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-30000": {"stats": {"total": 108893186, "nnz": 67034114, "linear_total": 84934656, "linear_nnz": 43075584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4692480, "linear_attention_total": 2359296, "linear_attention_nnz": 892416, "linear_dense_total": 4718592, "linear_dense_nnz": 3800064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4379136, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 3657216}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4735488, "linear_attention_total": 2359296, "linear_attention_nnz": 920064, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4850688, "linear_attention_total": 2359296, "linear_attention_nnz": 1052160, "linear_dense_total": 4718592, "linear_dense_nnz": 3798528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4751616, "linear_attention_total": 2359296, "linear_attention_nnz": 1118976, "linear_dense_total": 4718592, "linear_dense_nnz": 3632640}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4547328, "linear_attention_total": 2359296, "linear_attention_nnz": 1017600, "linear_dense_total": 4718592, "linear_dense_nnz": 3529728}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4306944, "linear_attention_total": 2359296, "linear_attention_nnz": 1061376, "linear_dense_total": 4718592, "linear_dense_nnz": 3245568}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3525888, "linear_attention_total": 2359296, "linear_attention_nnz": 793344, "linear_dense_total": 4718592, "linear_dense_nnz": 2732544}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2793216, "linear_attention_total": 2359296, "linear_attention_nnz": 919296, "linear_dense_total": 4718592, "linear_dense_nnz": 1873920}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1507584, "linear_attention_total": 2359296, "linear_attention_nnz": 541440, "linear_dense_total": 4718592, "linear_dense_nnz": 966144}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130496, "linear_attention_total": 2359296, "linear_attention_nnz": 443904, "linear_dense_total": 4718592, "linear_dense_nnz": 686592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1854720, "linear_attention_total": 2359296, "linear_attention_nnz": 332544, "linear_dense_total": 4718592, "linear_dense_nnz": 1522176}}, "total_sparsity": 38.44048791078626, "linear_sparsity": 49.283854166666664}, "speed": {"eval_elapsed_time": 30.27796263305936}, "opt_eval_metrics": {"exact_match": 79.40397350993378, "f1": 86.95662988564573}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 43891202, "linear_total": 84934656, "linear_nnz": 19932672, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045184, "linear_attention_total": 2359296, "linear_attention_nnz": 427776, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102784, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 1708032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2424576, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 1955328}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2502912, "linear_attention_total": 2359296, "linear_attention_nnz": 579840, "linear_dense_total": 4718592, "linear_dense_nnz": 1923072}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2376960, "linear_attention_total": 2359296, "linear_attention_nnz": 539904, "linear_dense_total": 4718592, "linear_dense_nnz": 1837056}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2201856, "linear_attention_total": 2359296, "linear_attention_nnz": 424704, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1907712, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 1468416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1580544, "linear_attention_total": 2359296, "linear_attention_nnz": 428544, "linear_dense_total": 4718592, "linear_dense_nnz": 1152000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1095168, "linear_attention_total": 2359296, "linear_attention_nnz": 397824, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527616, "linear_attention_total": 2359296, "linear_attention_nnz": 235776, "linear_dense_total": 4718592, "linear_dense_nnz": 291840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 428544, "linear_attention_total": 2359296, "linear_attention_nnz": 182784, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738816, "linear_attention_total": 2359296, "linear_attention_nnz": 112128, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}}, "total_sparsity": 59.6933438975695, "linear_sparsity": 76.53175636574075}, "speed": {"eval_elapsed_time": 20.700779567006975}, "opt_eval_metrics": {"exact_match": 79.13907284768212, "f1": 86.92362610004827}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49229570, "linear_total": 84934656, "linear_nnz": 25271040, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214400, "linear_attention_total": 2359296, "linear_attention_nnz": 721408, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2390784, "linear_attention_total": 2359296, "linear_attention_nnz": 635136, "linear_dense_total": 4718592, "linear_dense_nnz": 1755648}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850560, "linear_attention_total": 2359296, "linear_attention_nnz": 972032, "linear_dense_total": 4718592, "linear_dense_nnz": 1878528}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3188736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 1932288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3149824, "linear_attention_total": 2359296, "linear_attention_nnz": 1260544, "linear_dense_total": 4718592, "linear_dense_nnz": 1889280}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 1784832}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2455040, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015744, "linear_attention_total": 2359296, "linear_attention_nnz": 988160, "linear_dense_total": 4718592, "linear_dense_nnz": 1027584}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1550080, "linear_attention_total": 2359296, "linear_attention_nnz": 903424, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 886784, "linear_attention_total": 2359296, "linear_attention_nnz": 636416, "linear_dense_total": 4718592, "linear_dense_nnz": 250368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 682752, "linear_attention_total": 2359296, "linear_attention_nnz": 484608, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 980224, "linear_attention_total": 2359296, "linear_attention_nnz": 313600, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}}, "total_sparsity": 54.79095450471988, "linear_sparsity": 70.2464916087963}, "speed": {"eval_elapsed_time": 20.624390312936157}, "opt_eval_metrics": {"exact_match": 80.5771050141911, "f1": 88.02575212811699}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 53943554, "linear_total": 84934656, "linear_nnz": 29985024, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3576576, "linear_attention_total": 2359296, "linear_attention_nnz": 840960, "linear_dense_total": 4718592, "linear_dense_nnz": 2735616}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070464, "linear_attention_total": 2359296, "linear_attention_nnz": 288768, "linear_dense_total": 4718592, "linear_dense_nnz": 2781696}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3222528, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 3024384}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3771648, "linear_attention_total": 2359296, "linear_attention_nnz": 770304, "linear_dense_total": 4718592, "linear_dense_nnz": 3001344}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3129600, "linear_attention_total": 2359296, "linear_attention_nnz": 393984, "linear_dense_total": 4718592, "linear_dense_nnz": 2735616}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2864640, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 2668032}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2801664, "linear_attention_total": 2359296, "linear_attention_nnz": 548352, "linear_dense_total": 4718592, "linear_dense_nnz": 2253312}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2118144, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 1920000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1646592, "linear_attention_total": 2359296, "linear_attention_nnz": 284160, "linear_dense_total": 4718592, "linear_dense_nnz": 1362432}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 861696, "linear_attention_total": 2359296, "linear_attention_nnz": 202752, "linear_dense_total": 4718592, "linear_dense_nnz": 658944}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 288000, "linear_dense_total": 4718592, "linear_dense_nnz": 907776}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725696, "linear_attention_total": 2359296, "linear_attention_nnz": 315648, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}}, "total_sparsity": 50.461956361530284, "linear_sparsity": 64.69636140046296}, "speed": {"eval_elapsed_time": 14.420848372974433}, "opt_eval_metrics": {"exact_match": 73.90728476821192, "f1": 82.48749394175648}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49808642, "linear_total": 84934656, "linear_nnz": 25850112, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2888448, "linear_attention_total": 2359296, "linear_attention_nnz": 652032, "linear_dense_total": 4718592, "linear_dense_nnz": 2236416}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2680320, "linear_attention_total": 2359296, "linear_attention_nnz": 293376, "linear_dense_total": 4718592, "linear_dense_nnz": 2386944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2864640, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 2666496}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3171072, "linear_attention_total": 2359296, "linear_attention_nnz": 530688, "linear_dense_total": 4718592, "linear_dense_nnz": 2640384}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2757888, "linear_attention_total": 2359296, "linear_attention_nnz": 392448, "linear_dense_total": 4718592, "linear_dense_nnz": 2365440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2569728, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 2373120}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2244096, "linear_attention_total": 2359296, "linear_attention_nnz": 310272, "linear_dense_total": 4718592, "linear_dense_nnz": 1933824}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1843968, "linear_attention_total": 2359296, "linear_attention_nnz": 197376, "linear_dense_total": 4718592, "linear_dense_nnz": 1646592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1384704, "linear_attention_total": 2359296, "linear_attention_nnz": 200448, "linear_dense_total": 4718592, "linear_dense_nnz": 1184256}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 204288, "linear_dense_total": 4718592, "linear_dense_nnz": 556032}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1115904, "linear_attention_total": 2359296, "linear_attention_nnz": 286464, "linear_dense_total": 4718592, "linear_dense_nnz": 829440}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1569024, "linear_attention_total": 2359296, "linear_attention_nnz": 315648, "linear_dense_total": 4718592, "linear_dense_nnz": 1253376}}, "total_sparsity": 54.25917467416189, "linear_sparsity": 69.56470630787037}, "speed": {"eval_elapsed_time": 12.429447512025945}, "opt_eval_metrics": {"exact_match": 70.05676442762535, "f1": 79.26883508935717}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 42173698, "linear_total": 84934656, "linear_nnz": 18215168, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1516544, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1758464, "linear_attention_total": 2359296, "linear_attention_nnz": 564992, "linear_dense_total": 4718592, "linear_dense_nnz": 1193472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2030080, "linear_attention_total": 2359296, "linear_attention_nnz": 646144, "linear_dense_total": 4718592, "linear_dense_nnz": 1383936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2328832, "linear_attention_total": 2359296, "linear_attention_nnz": 969472, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2283264, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1425408}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2099200, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 1396224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1846784, "linear_attention_total": 2359296, "linear_attention_nnz": 774656, "linear_dense_total": 4718592, "linear_dense_nnz": 1072128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1589760, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 783360}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 967424, "linear_attention_total": 2359296, "linear_attention_nnz": 520448, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 617216, "linear_attention_total": 2359296, "linear_attention_nnz": 435968, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 521984, "linear_attention_total": 2359296, "linear_attention_nnz": 354560, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655616, "linear_attention_total": 2359296, "linear_attention_nnz": 231680, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.27058124647028, "linear_sparsity": 78.55390383873457}, "speed": {"eval_elapsed_time": 16.997670065960847}, "opt_eval_metrics": {"exact_match": 79.2620624408704, "f1": 86.94475047733708}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42038274, "linear_total": 84934656, "linear_nnz": 18079744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1493248, "linear_attention_total": 2359296, "linear_attention_nnz": 519424, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757440, "linear_attention_total": 2359296, "linear_attention_nnz": 565504, "linear_dense_total": 4718592, "linear_dense_nnz": 1191936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028800, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1382400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2297088, "linear_attention_total": 2359296, "linear_attention_nnz": 937728, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2270464, "linear_attention_total": 2359296, "linear_attention_nnz": 846592, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081792, "linear_attention_total": 2359296, "linear_attention_nnz": 688640, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815296, "linear_attention_total": 2359296, "linear_attention_nnz": 744704, "linear_dense_total": 4718592, "linear_dense_nnz": 1070592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1613312, "linear_attention_total": 2359296, "linear_attention_nnz": 831488, "linear_dense_total": 4718592, "linear_dense_nnz": 781824}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 522496, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 594944, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 513792, "linear_attention_total": 2359296, "linear_attention_nnz": 346368, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 644096, "linear_attention_total": 2359296, "linear_attention_nnz": 220160, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.39494531824976, "linear_sparsity": 78.7133487654321}, "speed": {"eval_elapsed_time": 16.94853257900104}, "opt_eval_metrics": {"exact_match": 79.2620624408704, "f1": 86.97983371588884}}}, "base_speed_report": {"eval_elapsed_time": 38.708956059999764}} \ No newline at end of file diff --git a/analysis/files/results/results4.json b/analysis/files/results/results4.json deleted file mode 100644 index 73e94069..00000000 --- a/analysis/files/results/results4.json +++ /dev/null @@ -1 +0,0 @@ -{"checkpoints": {"/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42067458, "linear_total": 84934656, "linear_nnz": 18108928, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437184, "linear_attention_total": 2359296, "linear_attention_nnz": 472576, "linear_dense_total": 4718592, "linear_dense_nnz": 964608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1754624, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015488, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2276608, "linear_attention_total": 2359296, "linear_attention_nnz": 951040, "linear_dense_total": 4718592, "linear_dense_nnz": 1325568}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2280448, "linear_attention_total": 2359296, "linear_attention_nnz": 861184, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2123008, "linear_attention_total": 2359296, "linear_attention_nnz": 779008, "linear_dense_total": 4718592, "linear_dense_nnz": 1344000}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841152, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1041408}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553664, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042432, "linear_attention_total": 2359296, "linear_attention_nnz": 610816, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 584960, "linear_attention_total": 2359296, "linear_attention_nnz": 405248, "linear_dense_total": 4718592, "linear_dense_nnz": 179712}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 540928, "linear_attention_total": 2359296, "linear_attention_nnz": 395008, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 658432, "linear_attention_total": 2359296, "linear_attention_nnz": 217600, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}}, "total_sparsity": 61.3681447432349, "linear_sparsity": 78.6789882330247}, "speed": {"eval_elapsed_time": 18.375705623999238}, "opt_eval_metrics": {"exact_match": 79.38505203405866, "f1": 87.07400986053686}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41735426, "linear_total": 84934656, "linear_nnz": 17776896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1405440, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 929280}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1732352, "linear_attention_total": 2359296, "linear_attention_nnz": 589568, "linear_dense_total": 4718592, "linear_dense_nnz": 1142784}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1979136, "linear_attention_total": 2359296, "linear_attention_nnz": 628992, "linear_dense_total": 4718592, "linear_dense_nnz": 1350144}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2218752, "linear_attention_total": 2359296, "linear_attention_nnz": 913152, "linear_dense_total": 4718592, "linear_dense_nnz": 1305600}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2257664, "linear_attention_total": 2359296, "linear_attention_nnz": 850688, "linear_dense_total": 4718592, "linear_dense_nnz": 1406976}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096384, "linear_attention_total": 2359296, "linear_attention_nnz": 764672, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1786112, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 1022976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1538816, "linear_attention_total": 2359296, "linear_attention_nnz": 781568, "linear_dense_total": 4718592, "linear_dense_nnz": 757248}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1027840, "linear_attention_total": 2359296, "linear_attention_nnz": 596224, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 176640}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523008, "linear_attention_total": 2359296, "linear_attention_nnz": 378624, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640000, "linear_attention_total": 2359296, "linear_attention_nnz": 208384, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.67306005721974, "linear_sparsity": 79.0699146412037}, "speed": {"eval_elapsed_time": 17.870226074010134}, "opt_eval_metrics": {"exact_match": 78.72280037842951, "f1": 86.62043892712619}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 43462146, "linear_total": 84934656, "linear_nnz": 19503616, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660672, "linear_attention_total": 2359296, "linear_attention_nnz": 579328, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 632576, "linear_dense_total": 4718592, "linear_dense_nnz": 1267200}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2031104, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1446912}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2544128, "linear_attention_total": 2359296, "linear_attention_nnz": 1049600, "linear_dense_total": 4718592, "linear_dense_nnz": 1494528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2395904, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 1479168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2184960, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 1394688}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1912320, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1113600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 969216, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 471808, "linear_dense_total": 4718592, "linear_dense_nnz": 497664}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 717312, "linear_attention_total": 2359296, "linear_attention_nnz": 505344, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631040, "linear_attention_total": 2359296, "linear_attention_nnz": 448256, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 288256, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}}, "total_sparsity": 60.08735936884057, "linear_sparsity": 77.03691647376543}, "speed": {"eval_elapsed_time": 16.235010980977677}, "opt_eval_metrics": {"exact_match": 78.93093661305582, "f1": 86.85787750084084}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42678018, "linear_total": 84934656, "linear_nnz": 18719488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1560320, "linear_attention_total": 2359296, "linear_attention_nnz": 543488, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1816320, "linear_attention_total": 2359296, "linear_attention_nnz": 593664, "linear_dense_total": 4718592, "linear_dense_nnz": 1222656}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2024704, "linear_attention_total": 2359296, "linear_attention_nnz": 603904, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2329856, "linear_attention_total": 2359296, "linear_attention_nnz": 870656, "linear_dense_total": 4718592, "linear_dense_nnz": 1459200}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2332928, "linear_attention_total": 2359296, "linear_attention_nnz": 887552, "linear_dense_total": 4718592, "linear_dense_nnz": 1445376}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 720640, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742080, "linear_attention_total": 2359296, "linear_attention_nnz": 926464, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 944384, "linear_attention_total": 2359296, "linear_attention_nnz": 455936, "linear_dense_total": 4718592, "linear_dense_nnz": 488448}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705280, "linear_attention_total": 2359296, "linear_attention_nnz": 505600, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 587264, "linear_attention_total": 2359296, "linear_attention_nnz": 409088, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 697856, "linear_attention_total": 2359296, "linear_attention_nnz": 250880, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}}, "total_sparsity": 60.80744850279245, "linear_sparsity": 77.96012972608024}, "speed": {"eval_elapsed_time": 16.02295208198484}, "opt_eval_metrics": {"exact_match": 78.78902554399244, "f1": 86.63899702391797}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 46192898, "linear_total": 84934656, "linear_nnz": 22234368, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 551680, "linear_dense_total": 4718592, "linear_dense_nnz": 1539072}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2278656, "linear_attention_total": 2359296, "linear_attention_nnz": 596736, "linear_dense_total": 4718592, "linear_dense_nnz": 1681920}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2418688, "linear_attention_total": 2359296, "linear_attention_nnz": 567808, "linear_dense_total": 4718592, "linear_dense_nnz": 1850880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2844416, "linear_attention_total": 2359296, "linear_attention_nnz": 1002752, "linear_dense_total": 4718592, "linear_dense_nnz": 1841664}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2691072, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 1812480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2475264, "linear_attention_total": 2359296, "linear_attention_nnz": 721152, "linear_dense_total": 4718592, "linear_dense_nnz": 1754112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2229248, "linear_attention_total": 2359296, "linear_attention_nnz": 805376, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1966336, "linear_attention_total": 2359296, "linear_attention_nnz": 892672, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701440, "linear_attention_total": 2359296, "linear_attention_nnz": 454144, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598272, "linear_attention_total": 2359296, "linear_attention_nnz": 361728, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858880, "linear_attention_total": 2359296, "linear_attention_nnz": 238336, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}}, "total_sparsity": 57.57962486284496, "linear_sparsity": 73.82179542824075}, "speed": {"eval_elapsed_time": 17.927992683951743}, "opt_eval_metrics": {"exact_match": 79.38505203405866, "f1": 86.84616693145111}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39741442, "linear_total": 84934656, "linear_nnz": 15782912, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1171968, "linear_attention_total": 2359296, "linear_attention_nnz": 511488, "linear_dense_total": 4718592, "linear_dense_nnz": 660480}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1531136, "linear_attention_total": 2359296, "linear_attention_nnz": 591104, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1722112, "linear_attention_total": 2359296, "linear_attention_nnz": 656128, "linear_dense_total": 4718592, "linear_dense_nnz": 1065984}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2136320, "linear_attention_total": 2359296, "linear_attention_nnz": 985856, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2026752, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1168896}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1822976, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 1138176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488384, "linear_attention_total": 2359296, "linear_attention_nnz": 668160, "linear_dense_total": 4718592, "linear_dense_nnz": 820224}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1423104, "linear_attention_total": 2359296, "linear_attention_nnz": 793344, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 861184, "linear_attention_total": 2359296, "linear_attention_nnz": 494080, "linear_dense_total": 4718592, "linear_dense_nnz": 367104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 570880, "linear_attention_total": 2359296, "linear_attention_nnz": 417280, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 499968, "linear_attention_total": 2359296, "linear_attention_nnz": 370944, "linear_dense_total": 4718592, "linear_dense_nnz": 129024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 528128, "linear_attention_total": 2359296, "linear_attention_nnz": 224000, "linear_dense_total": 4718592, "linear_dense_nnz": 304128}}, "total_sparsity": 63.504197590471826, "linear_sparsity": 81.41758294753086}, "speed": {"eval_elapsed_time": 16.204893412068486}, "opt_eval_metrics": {"exact_match": 78.93093661305582, "f1": 86.77654280449566}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-130000": {"stats": {"total": 108893186, "nnz": 38778370, "linear_total": 84934656, "linear_nnz": 14819840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1050624, "linear_attention_total": 2359296, "linear_attention_nnz": 488448, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 512512, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1628160, "linear_attention_total": 2359296, "linear_attention_nnz": 628224, "linear_dense_total": 4718592, "linear_dense_nnz": 999936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998592, "linear_attention_total": 2359296, "linear_attention_nnz": 937216, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1939968, "linear_attention_total": 2359296, "linear_attention_nnz": 821760, "linear_dense_total": 4718592, "linear_dense_nnz": 1118208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1709824, "linear_attention_total": 2359296, "linear_attention_nnz": 648448, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404928, "linear_attention_total": 2359296, "linear_attention_nnz": 641536, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817920, "linear_attention_total": 2359296, "linear_attention_nnz": 467712, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 544512, "linear_attention_total": 2359296, "linear_attention_nnz": 403200, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484096, "linear_attention_total": 2359296, "linear_attention_nnz": 367360, "linear_dense_total": 4718592, "linear_dense_nnz": 116736}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496896, "linear_attention_total": 2359296, "linear_attention_nnz": 225024, "linear_dense_total": 4718592, "linear_dense_nnz": 271872}}, "total_sparsity": 64.38861656596218, "linear_sparsity": 82.5514805169753}, "speed": {"eval_elapsed_time": 16.045786170987412}, "opt_eval_metrics": {"exact_match": 78.88363292336803, "f1": 86.63235572290178}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-165000": {"stats": {"total": 108893186, "nnz": 38293506, "linear_total": 84934656, "linear_nnz": 14334976, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1010688, "linear_attention_total": 2359296, "linear_attention_nnz": 468480, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1371392, "linear_attention_total": 2359296, "linear_attention_nnz": 518912, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1590272, "linear_attention_total": 2359296, "linear_attention_nnz": 608768, "linear_dense_total": 4718592, "linear_dense_nnz": 981504}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895936, "linear_attention_total": 2359296, "linear_attention_nnz": 869888, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1869568, "linear_attention_total": 2359296, "linear_attention_nnz": 775936, "linear_dense_total": 4718592, "linear_dense_nnz": 1093632}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663232, "linear_attention_total": 2359296, "linear_attention_nnz": 618752, "linear_dense_total": 4718592, "linear_dense_nnz": 1044480}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 629248, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1295872, "linear_attention_total": 2359296, "linear_attention_nnz": 707584, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 808704, "linear_attention_total": 2359296, "linear_attention_nnz": 463104, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 515840, "linear_attention_total": 2359296, "linear_attention_nnz": 376064, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455936, "linear_attention_total": 2359296, "linear_attention_nnz": 345344, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 212992, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 64.83388225963009, "linear_sparsity": 83.1223476080247}, "speed": {"eval_elapsed_time": 15.8394883510191}, "opt_eval_metrics": {"exact_match": 78.9593188268685, "f1": 86.71766917125102}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38916354, "linear_total": 84934656, "linear_nnz": 14957824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1209344, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 749568}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1494272, "linear_attention_total": 2359296, "linear_attention_nnz": 488192, "linear_dense_total": 4718592, "linear_dense_nnz": 1006080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1636096, "linear_attention_total": 2359296, "linear_attention_nnz": 550144, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969664, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1746944, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 1198080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1782272, "linear_attention_total": 2359296, "linear_attention_nnz": 653312, "linear_dense_total": 4718592, "linear_dense_nnz": 1128960}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1461760, "linear_attention_total": 2359296, "linear_attention_nnz": 593920, "linear_dense_total": 4718592, "linear_dense_nnz": 867840}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 754688, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531968, "linear_attention_total": 2359296, "linear_attention_nnz": 373760, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460032, "linear_attention_total": 2359296, "linear_attention_nnz": 311040, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}}, "total_sparsity": 64.26190156654981, "linear_sparsity": 82.38902150848766}, "speed": {"eval_elapsed_time": 15.605650334036909}, "opt_eval_metrics": {"exact_match": 78.69441816461683, "f1": 86.58409293332078}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 50872322, "linear_total": 84934656, "linear_nnz": 26913792, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692352, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 2007552}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666496, "linear_attention_total": 2359296, "linear_attention_nnz": 646656, "linear_dense_total": 4718592, "linear_dense_nnz": 2019840}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2931200, "linear_attention_total": 2359296, "linear_attention_nnz": 691712, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3361024, "linear_attention_total": 2359296, "linear_attention_nnz": 1149184, "linear_dense_total": 4718592, "linear_dense_nnz": 2211840}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3165952, "linear_attention_total": 2359296, "linear_attention_nnz": 1007872, "linear_dense_total": 4718592, "linear_dense_nnz": 2158080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070976, "linear_attention_total": 2359296, "linear_attention_nnz": 997376, "linear_dense_total": 4718592, "linear_dense_nnz": 2073600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2644480, "linear_attention_total": 2359296, "linear_attention_nnz": 911872, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2248704, "linear_attention_total": 2359296, "linear_attention_nnz": 944640, "linear_dense_total": 4718592, "linear_dense_nnz": 1304064}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1514240, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 751104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 839424, "linear_attention_total": 2359296, "linear_attention_nnz": 526080, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 707072, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 274944}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1071872, "linear_attention_total": 2359296, "linear_attention_nnz": 277760, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}}, "total_sparsity": 53.282364242699266, "linear_sparsity": 68.31235532407408}, "speed": {"eval_elapsed_time": 20.12763703102246}, "opt_eval_metrics": {"exact_match": 79.94323557237465, "f1": 87.52956877579788}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl225_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 27752545, "linear_total": 84934656, "linear_nnz": 3794015, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 465383, "linear_attention_total": 2359296, "linear_attention_nnz": 18728, "linear_dense_total": 4718592, "linear_dense_nnz": 446655}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527397, "linear_attention_total": 2359296, "linear_attention_nnz": 63059, "linear_dense_total": 4718592, "linear_dense_nnz": 464338}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516492, "linear_attention_total": 2359296, "linear_attention_nnz": 53761, "linear_dense_total": 4718592, "linear_dense_nnz": 462731}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 514972, "linear_attention_total": 2359296, "linear_attention_nnz": 84624, "linear_dense_total": 4718592, "linear_dense_nnz": 430348}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 443214, "linear_attention_total": 2359296, "linear_attention_nnz": 58345, "linear_dense_total": 4718592, "linear_dense_nnz": 384869}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 396921, "linear_attention_total": 2359296, "linear_attention_nnz": 50615, "linear_dense_total": 4718592, "linear_dense_nnz": 346306}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 319004, "linear_attention_total": 2359296, "linear_attention_nnz": 41344, "linear_dense_total": 4718592, "linear_dense_nnz": 277660}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 249183, "linear_attention_total": 2359296, "linear_attention_nnz": 47420, "linear_dense_total": 4718592, "linear_dense_nnz": 201763}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 161062, "linear_attention_total": 2359296, "linear_attention_nnz": 27562, "linear_dense_total": 4718592, "linear_dense_nnz": 133500}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 81705, "linear_attention_total": 2359296, "linear_attention_nnz": 34151, "linear_dense_total": 4718592, "linear_dense_nnz": 47554}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64643, "linear_attention_total": 2359296, "linear_attention_nnz": 21311, "linear_dense_total": 4718592, "linear_dense_nnz": 43332}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 54039, "linear_attention_total": 2359296, "linear_attention_nnz": 17233, "linear_dense_total": 4718592, "linear_dense_nnz": 36806}}, "total_sparsity": 74.51397463933142, "linear_sparsity": 95.5330189363456}, "speed": {"eval_elapsed_time": 69.66989313997328, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-45000": {"stats": {"total": 108893186, "nnz": 52655769, "linear_total": 84934656, "linear_nnz": 28740096, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3704832, "linear_attention_total": 2359296, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 2131968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2818560, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 2327040}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3674112, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2494464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3592704, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2314752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2942976, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 2156544}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2844672, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 2156544}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2363904, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 1675776}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1738752, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1345536}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1665024, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 878592}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893952, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 402432}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059840, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1440768, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 949248}}, "total_sparsity": 51.64456938563631, "linear_sparsity": 66.162109375}, "speed": {"eval_elapsed_time": 15.83343747886829}, "opt_eval_metrics": {"exact_match": 78.1929990539262, "f1": 85.92206431273945}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 47478801, "linear_total": 84934656, "linear_nnz": 23566848, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2959872, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1485312}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2121216, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1728000}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3085824, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1906176}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3084288, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1806336}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2422272, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 1734144}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2302464, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1712640}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1878528, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1288704}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437696, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1044480}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1494528, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 708096}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 617472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 322560}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 947712, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 456192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1214976, "linear_attention_total": 2359296, "linear_attention_nnz": 442368, "linear_dense_total": 4718592, "linear_dense_nnz": 772608}}, "total_sparsity": 56.39874013788153, "linear_sparsity": 72.2529658564815}, "speed": {"eval_elapsed_time": 14.195255008991808}, "opt_eval_metrics": {"exact_match": 77.69157994323557, "f1": 85.75507572992562}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41730197, "linear_total": 84934656, "linear_nnz": 17822208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2202624, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2397696, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1218048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2302464, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1122816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1794048, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1460736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1155072, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1290240, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 503808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 423936, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 227328}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 806400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 904704, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 511488}}, "total_sparsity": 61.67786201057612, "linear_sparsity": 79.0165653935185}, "speed": {"eval_elapsed_time": 12.292132368078455}, "opt_eval_metrics": {"exact_match": 77.70104068117313, "f1": 85.6071153919288}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 33917936, "linear_total": 84934656, "linear_nnz": 9959406, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1111233, "linear_attention_total": 2359296, "linear_attention_nnz": 56754, "linear_dense_total": 4718592, "linear_dense_nnz": 1054479}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222867, "linear_attention_total": 2359296, "linear_attention_nnz": 116764, "linear_dense_total": 4718592, "linear_dense_nnz": 1106103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264439, "linear_attention_total": 2359296, "linear_attention_nnz": 127558, "linear_dense_total": 4718592, "linear_dense_nnz": 1136881}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1270104, "linear_attention_total": 2359296, "linear_attention_nnz": 163709, "linear_dense_total": 4718592, "linear_dense_nnz": 1106395}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1202300, "linear_attention_total": 2359296, "linear_attention_nnz": 158018, "linear_dense_total": 4718592, "linear_dense_nnz": 1044282}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1136195, "linear_attention_total": 2359296, "linear_attention_nnz": 125746, "linear_dense_total": 4718592, "linear_dense_nnz": 1010449}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 971117, "linear_attention_total": 2359296, "linear_attention_nnz": 110023, "linear_dense_total": 4718592, "linear_dense_nnz": 861094}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746075, "linear_attention_total": 2359296, "linear_attention_nnz": 113086, "linear_dense_total": 4718592, "linear_dense_nnz": 632989}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488971, "linear_attention_total": 2359296, "linear_attention_nnz": 81879, "linear_dense_total": 4718592, "linear_dense_nnz": 407092}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 250695, "linear_attention_total": 2359296, "linear_attention_nnz": 77365, "linear_dense_total": 4718592, "linear_dense_nnz": 173330}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 172793, "linear_attention_total": 2359296, "linear_attention_nnz": 50915, "linear_dense_total": 4718592, "linear_dense_nnz": 121878}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 122617, "linear_attention_total": 2359296, "linear_attention_nnz": 28303, "linear_dense_total": 4718592, "linear_dense_nnz": 94314}}, "total_sparsity": 68.85210429971255, "linear_sparsity": 88.27403739646628}, "speed": {"eval_elapsed_time": 75.02001089300029}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33825359, "linear_total": 84934656, "linear_nnz": 9866829, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1100628, "linear_attention_total": 2359296, "linear_attention_nnz": 56086, "linear_dense_total": 4718592, "linear_dense_nnz": 1044542}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211778, "linear_attention_total": 2359296, "linear_attention_nnz": 115328, "linear_dense_total": 4718592, "linear_dense_nnz": 1096450}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253069, "linear_attention_total": 2359296, "linear_attention_nnz": 125881, "linear_dense_total": 4718592, "linear_dense_nnz": 1127188}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258511, "linear_attention_total": 2359296, "linear_attention_nnz": 161525, "linear_dense_total": 4718592, "linear_dense_nnz": 1096986}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1191705, "linear_attention_total": 2359296, "linear_attention_nnz": 155911, "linear_dense_total": 4718592, "linear_dense_nnz": 1035794}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125428, "linear_attention_total": 2359296, "linear_attention_nnz": 123921, "linear_dense_total": 4718592, "linear_dense_nnz": 1001507}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 961919, "linear_attention_total": 2359296, "linear_attention_nnz": 108430, "linear_dense_total": 4718592, "linear_dense_nnz": 853489}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738628, "linear_attention_total": 2359296, "linear_attention_nnz": 111505, "linear_dense_total": 4718592, "linear_dense_nnz": 627123}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484188, "linear_attention_total": 2359296, "linear_attention_nnz": 80805, "linear_dense_total": 4718592, "linear_dense_nnz": 403383}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 247948, "linear_attention_total": 2359296, "linear_attention_nnz": 76456, "linear_dense_total": 4718592, "linear_dense_nnz": 171492}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 171235, "linear_attention_total": 2359296, "linear_attention_nnz": 50374, "linear_dense_total": 4718592, "linear_dense_nnz": 120861}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 121792, "linear_attention_total": 2359296, "linear_attention_nnz": 28038, "linear_dense_total": 4718592, "linear_dense_nnz": 93754}}, "total_sparsity": 68.93712063856779, "linear_sparsity": 88.38303530657733}, "speed": {"eval_elapsed_time": 75.69579442497343}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl150_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 29470276, "linear_total": 84934656, "linear_nnz": 5511746, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655184, "linear_attention_total": 2359296, "linear_attention_nnz": 30729, "linear_dense_total": 4718592, "linear_dense_nnz": 624455}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 733131, "linear_attention_total": 2359296, "linear_attention_nnz": 77742, "linear_dense_total": 4718592, "linear_dense_nnz": 655389}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730379, "linear_attention_total": 2359296, "linear_attention_nnz": 70206, "linear_dense_total": 4718592, "linear_dense_nnz": 660173}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 734451, "linear_attention_total": 2359296, "linear_attention_nnz": 106339, "linear_dense_total": 4718592, "linear_dense_nnz": 628112}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655863, "linear_attention_total": 2359296, "linear_attention_nnz": 81845, "linear_dense_total": 4718592, "linear_dense_nnz": 574018}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 606306, "linear_attention_total": 2359296, "linear_attention_nnz": 68554, "linear_dense_total": 4718592, "linear_dense_nnz": 537752}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 492846, "linear_attention_total": 2359296, "linear_attention_nnz": 58217, "linear_dense_total": 4718592, "linear_dense_nnz": 434629}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379389, "linear_attention_total": 2359296, "linear_attention_nnz": 65705, "linear_dense_total": 4718592, "linear_dense_nnz": 313684}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 243207, "linear_attention_total": 2359296, "linear_attention_nnz": 39483, "linear_dense_total": 4718592, "linear_dense_nnz": 203724}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 119606, "linear_attention_total": 2359296, "linear_attention_nnz": 46007, "linear_dense_total": 4718592, "linear_dense_nnz": 73599}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 89281, "linear_attention_total": 2359296, "linear_attention_nnz": 27892, "linear_dense_total": 4718592, "linear_dense_nnz": 61389}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 72103, "linear_attention_total": 2359296, "linear_attention_nnz": 20781, "linear_dense_total": 4718592, "linear_dense_nnz": 51322}}, "total_sparsity": 72.93652882926945, "linear_sparsity": 93.51060419906804}, "speed": {"eval_elapsed_time": 71.46695366402855, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-50000": {"stats": {"total": 108893186, "nnz": 40770050, "linear_total": 84934656, "linear_nnz": 16811520, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2190336, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1641984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 855552}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2171904, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 992256}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1900032, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1717248, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 1029120}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1789440, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1003008}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1319424, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 729600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1030656, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 637440}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1179648, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 393216}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 715776, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 224256}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}}, "total_sparsity": 62.55959486757969, "linear_sparsity": 80.20652488425925}, "speed": {"eval_elapsed_time": 12.685803183936514}, "opt_eval_metrics": {"exact_match": 75.79943235572375, "f1": 84.3797785815339}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 37334018, "linear_total": 84934656, "linear_nnz": 13375488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663488, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1282560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451520, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 566784}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1385472, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1265664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 675840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1069056, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.7150099364344, "linear_sparsity": 84.25202546296296}, "speed": {"eval_elapsed_time": 11.091999777941965}, "opt_eval_metrics": {"exact_match": 76.54683065279092, "f1": 84.56290825102765}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37189634, "linear_total": 84934656, "linear_nnz": 13231104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1658880, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1233408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1354752, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1387008, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 678912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.84760225492897, "linear_sparsity": 84.42201967592592}, "speed": {"eval_elapsed_time": 11.029475754010491}, "opt_eval_metrics": {"exact_match": 75.99810785241249, "f1": 84.26442986520863}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36773378, "linear_total": 84934656, "linear_nnz": 12814848, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1044480, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1177088, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1450496, "linear_attention_total": 2359296, "linear_attention_nnz": 492032, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652224, "linear_attention_total": 2359296, "linear_attention_nnz": 733696, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1511680, "linear_attention_total": 2359296, "linear_attention_nnz": 461056, "linear_dense_total": 4718592, "linear_dense_nnz": 1050624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1533952, "linear_attention_total": 2359296, "linear_attention_nnz": 580096, "linear_dense_total": 4718592, "linear_dense_nnz": 953856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1227520, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 764928}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 624384, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 700416, "linear_attention_total": 2359296, "linear_attention_nnz": 351744, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 479744, "linear_attention_total": 2359296, "linear_attention_nnz": 339968, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411392, "linear_attention_total": 2359296, "linear_attention_nnz": 276224, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 430080, "linear_attention_total": 2359296, "linear_attention_nnz": 178176, "linear_dense_total": 4718592, "linear_dense_nnz": 251904}}, "total_sparsity": 66.22986308803564, "linear_sparsity": 84.912109375}, "speed": {"eval_elapsed_time": 14.409963917918503}, "opt_eval_metrics": {"exact_match": 77.93755912961211, "f1": 86.0611894864831}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 47136529, "linear_total": 84934656, "linear_nnz": 23220736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1984512, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2164224, "linear_attention_total": 2359296, "linear_attention_nnz": 592896, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2625536, "linear_attention_total": 2359296, "linear_attention_nnz": 880640, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2992640, "linear_attention_total": 2359296, "linear_attention_nnz": 1230848, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2940928, "linear_attention_total": 2359296, "linear_attention_nnz": 1214464, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2535936, "linear_attention_total": 2359296, "linear_attention_nnz": 906240, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2213376, "linear_attention_total": 2359296, "linear_attention_nnz": 943104, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1923584, "linear_attention_total": 2359296, "linear_attention_nnz": 935936, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1419264, "linear_attention_total": 2359296, "linear_attention_nnz": 872448, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 883712, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667648, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 869376, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 56.713059162397904, "linear_sparsity": 72.66046971450618}, "speed": {"eval_elapsed_time": 19.07873147400096}, "opt_eval_metrics": {"exact_match": 80.27436140018922, "f1": 87.70461789964966}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46572775, "linear_total": 84934656, "linear_nnz": 22657536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1908736, "linear_attention_total": 2359296, "linear_attention_nnz": 627712, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2145280, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 1548288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2499072, "linear_attention_total": 2359296, "linear_attention_nnz": 789504, "linear_dense_total": 4718592, "linear_dense_nnz": 1709568}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2920960, "linear_attention_total": 2359296, "linear_attention_nnz": 1180672, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1204224, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2516992, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 1600512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2151936, "linear_attention_total": 2359296, "linear_attention_nnz": 909312, "linear_dense_total": 4718592, "linear_dense_nnz": 1242624}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1889792, "linear_attention_total": 2359296, "linear_attention_nnz": 917504, "linear_dense_total": 4718592, "linear_dense_nnz": 972288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1398272, "linear_attention_total": 2359296, "linear_attention_nnz": 856064, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858624, "linear_attention_total": 2359296, "linear_attention_nnz": 611328, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 634368, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 827392, "linear_attention_total": 2359296, "linear_attention_nnz": 268288, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}}, "total_sparsity": 57.23077199706509, "linear_sparsity": 73.32356770833333}, "speed": {"eval_elapsed_time": 18.581735570915043}, "opt_eval_metrics": {"exact_match": 79.80132450331126, "f1": 87.48291010744668}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-25000": {"stats": {"total": 108893186, "nnz": 97257474, "linear_total": 84934656, "linear_nnz": 73298944, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6452736, "linear_attention_total": 2359296, "linear_attention_nnz": 1889280, "linear_dense_total": 4718592, "linear_dense_nnz": 4563456}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6338048, "linear_attention_total": 2359296, "linear_attention_nnz": 1839104, "linear_dense_total": 4718592, "linear_dense_nnz": 4498944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6545920, "linear_attention_total": 2359296, "linear_attention_nnz": 2037760, "linear_dense_total": 4718592, "linear_dense_nnz": 4508160}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6564864, "linear_attention_total": 2359296, "linear_attention_nnz": 2056704, "linear_dense_total": 4718592, "linear_dense_nnz": 4508160}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6459904, "linear_attention_total": 2359296, "linear_attention_nnz": 2000896, "linear_dense_total": 4718592, "linear_dense_nnz": 4459008}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6377728, "linear_attention_total": 2359296, "linear_attention_nnz": 1963264, "linear_dense_total": 4718592, "linear_dense_nnz": 4414464}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6199808, "linear_attention_total": 2359296, "linear_attention_nnz": 1899008, "linear_dense_total": 4718592, "linear_dense_nnz": 4300800}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6070272, "linear_attention_total": 2359296, "linear_attention_nnz": 1938432, "linear_dense_total": 4718592, "linear_dense_nnz": 4131840}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5774848, "linear_attention_total": 2359296, "linear_attention_nnz": 1905664, "linear_dense_total": 4718592, "linear_dense_nnz": 3869184}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5543168, "linear_attention_total": 2359296, "linear_attention_nnz": 1779968, "linear_dense_total": 4718592, "linear_dense_nnz": 3763200}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5265920, "linear_attention_total": 2359296, "linear_attention_nnz": 1662464, "linear_dense_total": 4718592, "linear_dense_nnz": 3603456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5705728, "linear_attention_total": 2359296, "linear_attention_nnz": 1722880, "linear_dense_total": 4718592, "linear_dense_nnz": 3982848}}, "total_sparsity": 10.685436276976967, "linear_sparsity": 13.699604552469136}, "speed": {"eval_elapsed_time": 39.34595324099064}, "opt_eval_metrics": {"exact_match": 70.2554399243141, "f1": 79.54398424308184}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 44512539, "linear_total": 84934656, "linear_nnz": 20599296, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1869312, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 1254912}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2022400, "linear_attention_total": 2359296, "linear_attention_nnz": 612352, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2259968, "linear_attention_total": 2359296, "linear_attention_nnz": 699392, "linear_dense_total": 4718592, "linear_dense_nnz": 1560576}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2543616, "linear_attention_total": 2359296, "linear_attention_nnz": 1010688, "linear_dense_total": 4718592, "linear_dense_nnz": 1532928}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2463744, "linear_attention_total": 2359296, "linear_attention_nnz": 927744, "linear_dense_total": 4718592, "linear_dense_nnz": 1536000}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2345472, "linear_attention_total": 2359296, "linear_attention_nnz": 872448, "linear_dense_total": 4718592, "linear_dense_nnz": 1473024}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2036224, "linear_attention_total": 2359296, "linear_attention_nnz": 867328, "linear_dense_total": 4718592, "linear_dense_nnz": 1168896}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1800192, "linear_attention_total": 2359296, "linear_attention_nnz": 897024, "linear_dense_total": 4718592, "linear_dense_nnz": 903168}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1165824, "linear_attention_total": 2359296, "linear_attention_nnz": 642048, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 734208, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 576512, "linear_attention_total": 2359296, "linear_attention_nnz": 386048, "linear_dense_total": 4718592, "linear_dense_nnz": 190464}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 781824, "linear_attention_total": 2359296, "linear_attention_nnz": 261120, "linear_dense_total": 4718592, "linear_dense_nnz": 520704}}, "total_sparsity": 59.12275080279128, "linear_sparsity": 75.7468894675926}, "speed": {"eval_elapsed_time": 17.345293765887618}, "opt_eval_metrics": {"exact_match": 79.36613055818354, "f1": 87.31339978481493}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 40733175, "linear_total": 84934656, "linear_nnz": 16822784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1394688, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1640960, "linear_attention_total": 2359296, "linear_attention_nnz": 539648, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1878528, "linear_attention_total": 2359296, "linear_attention_nnz": 657408, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 864256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1902592, "linear_attention_total": 2359296, "linear_attention_nnz": 686080, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601536, "linear_attention_total": 2359296, "linear_attention_nnz": 649216, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1507328, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908800, "linear_attention_total": 2359296, "linear_attention_nnz": 474112, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 354304, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591872, "linear_attention_total": 2359296, "linear_attention_nnz": 226304, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 62.593458327135366, "linear_sparsity": 80.19326292438271}, "speed": {"eval_elapsed_time": 15.926922732032835}, "opt_eval_metrics": {"exact_match": 78.68495742667928, "f1": 86.66781681977909}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40239113, "linear_total": 84934656, "linear_nnz": 16329216, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1344512, "linear_attention_total": 2359296, "linear_attention_nnz": 518144, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 516096, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842176, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2097664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1184256}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2056192, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 664576, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1565184, "linear_attention_total": 2359296, "linear_attention_nnz": 629760, "linear_dense_total": 4718592, "linear_dense_nnz": 935424}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1486336, "linear_attention_total": 2359296, "linear_attention_nnz": 787456, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 844288, "linear_attention_total": 2359296, "linear_attention_nnz": 415744, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 423936, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 472064, "linear_attention_total": 2359296, "linear_attention_nnz": 324608, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555520, "linear_attention_total": 2359296, "linear_attention_nnz": 209920, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}}, "total_sparsity": 63.04717083032174, "linear_sparsity": 80.7743778935185}, "speed": {"eval_elapsed_time": 15.737465491052717}, "opt_eval_metrics": {"exact_match": 78.80794701986756, "f1": 86.74156854566804}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 46385410, "linear_total": 84934656, "linear_nnz": 22426880, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2004992, "linear_attention_total": 2359296, "linear_attention_nnz": 594944, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2177280, "linear_attention_total": 2359296, "linear_attention_nnz": 672000, "linear_dense_total": 4718592, "linear_dense_nnz": 1505280}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2592256, "linear_attention_total": 2359296, "linear_attention_nnz": 859648, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2838016, "linear_attention_total": 2359296, "linear_attention_nnz": 1172992, "linear_dense_total": 4718592, "linear_dense_nnz": 1665024}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2712832, "linear_attention_total": 2359296, "linear_attention_nnz": 1026304, "linear_dense_total": 4718592, "linear_dense_nnz": 1686528}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2600448, "linear_attention_total": 2359296, "linear_attention_nnz": 976896, "linear_dense_total": 4718592, "linear_dense_nnz": 1623552}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2246144, "linear_attention_total": 2359296, "linear_attention_nnz": 955904, "linear_dense_total": 4718592, "linear_dense_nnz": 1290240}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842688, "linear_attention_total": 2359296, "linear_attention_nnz": 901120, "linear_dense_total": 4718592, "linear_dense_nnz": 941568}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1241856, "linear_attention_total": 2359296, "linear_attention_nnz": 718080, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 693760, "linear_attention_total": 2359296, "linear_attention_nnz": 475648, "linear_dense_total": 4718592, "linear_dense_nnz": 218112}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640768, "linear_attention_total": 2359296, "linear_attention_nnz": 441088, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835840, "linear_attention_total": 2359296, "linear_attention_nnz": 246016, "linear_dense_total": 4718592, "linear_dense_nnz": 589824}}, "total_sparsity": 57.4028351048522, "linear_sparsity": 73.59513647762346}, "speed": {"eval_elapsed_time": 19.68077167298179}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 87.07646648866317}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 43189250, "linear_total": 84934656, "linear_nnz": 19230720, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1584896, "linear_attention_total": 2359296, "linear_attention_nnz": 494336, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1917184, "linear_attention_total": 2359296, "linear_attention_nnz": 631552, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2092032, "linear_attention_total": 2359296, "linear_attention_nnz": 648192, "linear_dense_total": 4718592, "linear_dense_nnz": 1443840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2466816, "linear_attention_total": 2359296, "linear_attention_nnz": 1047552, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2403328, "linear_attention_total": 2359296, "linear_attention_nnz": 942592, "linear_dense_total": 4718592, "linear_dense_nnz": 1460736}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2211072, "linear_attention_total": 2359296, "linear_attention_nnz": 837888, "linear_dense_total": 4718592, "linear_dense_nnz": 1373184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1936640, "linear_attention_total": 2359296, "linear_attention_nnz": 841472, "linear_dense_total": 4718592, "linear_dense_nnz": 1095168}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661440, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 827904}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1084160, "linear_attention_total": 2359296, "linear_attention_nnz": 621824, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621056, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 188928}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 568064, "linear_attention_total": 2359296, "linear_attention_nnz": 411392, "linear_dense_total": 4718592, "linear_dense_nnz": 156672}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 684032, "linear_attention_total": 2359296, "linear_attention_nnz": 223232, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 60.33796825450584, "linear_sparsity": 77.3582175925926}, "speed": {"eval_elapsed_time": 18.309701333986595}, "opt_eval_metrics": {"exact_match": 78.74172185430463, "f1": 86.7418554019491}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42070530, "linear_total": 84934656, "linear_nnz": 18112000, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451008, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 970752}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835264, "linear_attention_total": 2359296, "linear_attention_nnz": 620288, "linear_dense_total": 4718592, "linear_dense_nnz": 1214976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2000384, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1374720}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2271232, "linear_attention_total": 2359296, "linear_attention_nnz": 933376, "linear_dense_total": 4718592, "linear_dense_nnz": 1337856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2267904, "linear_attention_total": 2359296, "linear_attention_nnz": 862464, "linear_dense_total": 4718592, "linear_dense_nnz": 1405440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081536, "linear_attention_total": 2359296, "linear_attention_nnz": 783616, "linear_dense_total": 4718592, "linear_dense_nnz": 1297920}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1807104, "linear_attention_total": 2359296, "linear_attention_nnz": 773376, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602048, "linear_attention_total": 2359296, "linear_attention_nnz": 811008, "linear_dense_total": 4718592, "linear_dense_nnz": 791040}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1009920, "linear_attention_total": 2359296, "linear_attention_nnz": 572160, "linear_dense_total": 4718592, "linear_dense_nnz": 437760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 543232, "linear_attention_total": 2359296, "linear_attention_nnz": 392704, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649472, "linear_attention_total": 2359296, "linear_attention_nnz": 214784, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}}, "total_sparsity": 61.365323630075444, "linear_sparsity": 78.67537133487654}, "speed": {"eval_elapsed_time": 18.103150750976056}, "opt_eval_metrics": {"exact_match": 78.92147587511826, "f1": 86.74888507219117}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41670402, "linear_total": 84934656, "linear_nnz": 17711872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1409024, "linear_attention_total": 2359296, "linear_attention_nnz": 468992, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1792000, "linear_attention_total": 2359296, "linear_attention_nnz": 606208, "linear_dense_total": 4718592, "linear_dense_nnz": 1185792}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1974272, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1348608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2231552, "linear_attention_total": 2359296, "linear_attention_nnz": 910592, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2209536, "linear_attention_total": 2359296, "linear_attention_nnz": 828672, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2046464, "linear_attention_total": 2359296, "linear_attention_nnz": 765440, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1764096, "linear_attention_total": 2359296, "linear_attention_nnz": 761088, "linear_dense_total": 4718592, "linear_dense_nnz": 1003008}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1573120, "linear_attention_total": 2359296, "linear_attention_nnz": 792832, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 986880, "linear_attention_total": 2359296, "linear_attention_nnz": 553728, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 572672, "linear_attention_total": 2359296, "linear_attention_nnz": 389888, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 525568, "linear_attention_total": 2359296, "linear_attention_nnz": 378112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}}, "total_sparsity": 61.73277361909495, "linear_sparsity": 79.14647231867285}, "speed": {"eval_elapsed_time": 17.77731288096402}, "opt_eval_metrics": {"exact_match": 78.76064333017976, "f1": 86.70283536757672}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41478658, "linear_total": 84934656, "linear_nnz": 17520128, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404160, "linear_attention_total": 2359296, "linear_attention_nnz": 465664, "linear_dense_total": 4718592, "linear_dense_nnz": 938496}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1766912, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1182720}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1961216, "linear_attention_total": 2359296, "linear_attention_nnz": 615680, "linear_dense_total": 4718592, "linear_dense_nnz": 1345536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 895488, "linear_dense_total": 4718592, "linear_dense_nnz": 1314816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2189824, "linear_attention_total": 2359296, "linear_attention_nnz": 812032, "linear_dense_total": 4718592, "linear_dense_nnz": 1377792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2038016, "linear_attention_total": 2359296, "linear_attention_nnz": 755456, "linear_dense_total": 4718592, "linear_dense_nnz": 1282560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1738240, "linear_attention_total": 2359296, "linear_attention_nnz": 739840, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571584, "linear_attention_total": 2359296, "linear_attention_nnz": 797440, "linear_dense_total": 4718592, "linear_dense_nnz": 774144}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 943872, "linear_attention_total": 2359296, "linear_attention_nnz": 513792, "linear_dense_total": 4718592, "linear_dense_nnz": 430080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563968, "linear_attention_total": 2359296, "linear_attention_nnz": 381184, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516352, "linear_attention_total": 2359296, "linear_attention_nnz": 370432, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 615680, "linear_attention_total": 2359296, "linear_attention_nnz": 200960, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}}, "total_sparsity": 61.90885809879785, "linear_sparsity": 79.37222704475309}, "speed": {"eval_elapsed_time": 17.746100773918442}, "opt_eval_metrics": {"exact_match": 78.85525070955535, "f1": 86.78368120366805}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40218943, "linear_total": 84934656, "linear_nnz": 16260413, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725694, "linear_attention_total": 2359296, "linear_attention_nnz": 36794, "linear_dense_total": 4718592, "linear_dense_nnz": 1688900}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959620, "linear_attention_total": 2359296, "linear_attention_nnz": 233028, "linear_dense_total": 4718592, "linear_dense_nnz": 1726592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969125, "linear_attention_total": 2359296, "linear_attention_nnz": 194318, "linear_dense_total": 4718592, "linear_dense_nnz": 1774807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2012358, "linear_attention_total": 2359296, "linear_attention_nnz": 270153, "linear_dense_total": 4718592, "linear_dense_nnz": 1742205}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860862, "linear_attention_total": 2359296, "linear_attention_nnz": 207935, "linear_dense_total": 4718592, "linear_dense_nnz": 1652927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815188, "linear_attention_total": 2359296, "linear_attention_nnz": 215427, "linear_dense_total": 4718592, "linear_dense_nnz": 1599761}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518978, "linear_attention_total": 2359296, "linear_attention_nnz": 114563, "linear_dense_total": 4718592, "linear_dense_nnz": 1404415}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307646, "linear_attention_total": 2359296, "linear_attention_nnz": 165011, "linear_dense_total": 4718592, "linear_dense_nnz": 1142635}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 946142, "linear_attention_total": 2359296, "linear_attention_nnz": 86589, "linear_dense_total": 4718592, "linear_dense_nnz": 859553}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531809, "linear_attention_total": 2359296, "linear_attention_nnz": 110020, "linear_dense_total": 4718592, "linear_dense_nnz": 421789}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 419075, "linear_attention_total": 2359296, "linear_attention_nnz": 89475, "linear_dense_total": 4718592, "linear_dense_nnz": 329600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 193916, "linear_attention_total": 2359296, "linear_attention_nnz": 45791, "linear_dense_total": 4718592, "linear_dense_nnz": 148125}}, "total_sparsity": 63.065693568741764, "linear_sparsity": 80.85538487375518}, "speed": {"eval_elapsed_time": 59.936431092966814}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39702836, "linear_total": 84934656, "linear_nnz": 15791104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125888, "linear_attention_total": 2359296, "linear_attention_nnz": 838656, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1188352, "linear_attention_total": 2359296, "linear_attention_nnz": 692224, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1694720, "linear_attention_total": 2359296, "linear_attention_nnz": 1089536, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1962496, "linear_attention_total": 2359296, "linear_attention_nnz": 1291264, "linear_dense_total": 4718592, "linear_dense_nnz": 671232}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2112512, "linear_attention_total": 2359296, "linear_attention_nnz": 1384448, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1783296, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1632768, "linear_attention_total": 2359296, "linear_attention_nnz": 1127424, "linear_dense_total": 4718592, "linear_dense_nnz": 505344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1333760, "linear_attention_total": 2359296, "linear_attention_nnz": 942080, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1204736, "linear_attention_total": 2359296, "linear_attention_nnz": 982016, "linear_dense_total": 4718592, "linear_dense_nnz": 222720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 729600, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 573952, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 449024, "linear_attention_total": 2359296, "linear_attention_nnz": 293888, "linear_dense_total": 4718592, "linear_dense_nnz": 155136}}, "total_sparsity": 63.53965068117302, "linear_sparsity": 81.40793788580247}, "speed": {"eval_elapsed_time": 17.66016855603084}, "opt_eval_metrics": {"exact_match": 79.20529801324503, "f1": 87.11181141207972}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 30451970, "linear_total": 84934656, "linear_nnz": 6493440, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 490240, "linear_attention_total": 2359296, "linear_attention_nnz": 259840, "linear_dense_total": 4718592, "linear_dense_nnz": 230400}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591104, "linear_attention_total": 2359296, "linear_attention_nnz": 225536, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 770048, "linear_attention_total": 2359296, "linear_attention_nnz": 286208, "linear_dense_total": 4718592, "linear_dense_nnz": 483840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 863488, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 747008, "linear_attention_total": 2359296, "linear_attention_nnz": 214016, "linear_dense_total": 4718592, "linear_dense_nnz": 532992}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 763392, "linear_attention_total": 2359296, "linear_attention_nnz": 285696, "linear_dense_total": 4718592, "linear_dense_nnz": 477696}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 499456, "linear_attention_total": 2359296, "linear_attention_nnz": 113920, "linear_dense_total": 4718592, "linear_dense_nnz": 385536}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 650752, "linear_attention_total": 2359296, "linear_attention_nnz": 303616, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 407808, "linear_attention_total": 2359296, "linear_attention_nnz": 162048, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 293888, "linear_attention_total": 2359296, "linear_attention_nnz": 206336, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 206336, "linear_attention_total": 2359296, "linear_attention_nnz": 117248, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 209920, "linear_attention_total": 2359296, "linear_attention_nnz": 103936, "linear_dense_total": 4718592, "linear_dense_nnz": 105984}}, "total_sparsity": 72.03500869191208, "linear_sparsity": 92.35478153935185}, "speed": {"eval_elapsed_time": 11.2053101999918}, "opt_eval_metrics": {"exact_match": 73.92620624408704, "f1": 83.01994135540168}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 30144002, "linear_total": 84934656, "linear_nnz": 6185472, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 462848, "linear_attention_total": 2359296, "linear_attention_nnz": 237056, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566784, "linear_attention_total": 2359296, "linear_attention_nnz": 219648, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 744960, "linear_attention_total": 2359296, "linear_attention_nnz": 278016, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 823552, "linear_attention_total": 2359296, "linear_attention_nnz": 379648, "linear_dense_total": 4718592, "linear_dense_nnz": 443904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708352, "linear_attention_total": 2359296, "linear_attention_nnz": 193792, "linear_dense_total": 4718592, "linear_dense_nnz": 514560}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 703488, "linear_attention_total": 2359296, "linear_attention_nnz": 247296, "linear_dense_total": 4718592, "linear_dense_nnz": 456192}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488448, "linear_attention_total": 2359296, "linear_attention_nnz": 118272, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 636160, "linear_attention_total": 2359296, "linear_attention_nnz": 296704, "linear_dense_total": 4718592, "linear_dense_nnz": 339456}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 393728, "linear_attention_total": 2359296, "linear_attention_nnz": 152576, "linear_dense_total": 4718592, "linear_dense_nnz": 241152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 278528, "linear_attention_total": 2359296, "linear_attention_nnz": 190976, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 190976, "linear_attention_total": 2359296, "linear_attention_nnz": 104960, "linear_dense_total": 4718592, "linear_dense_nnz": 86016}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 187648, "linear_attention_total": 2359296, "linear_attention_nnz": 90880, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 72.31782528614784, "linear_sparsity": 92.7173755787037}, "speed": {"eval_elapsed_time": 11.106899423059076}, "opt_eval_metrics": {"exact_match": 73.48155156102176, "f1": 82.77426887329388}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 30105858, "linear_total": 84934656, "linear_nnz": 6147328, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460800, "linear_attention_total": 2359296, "linear_attention_nnz": 235008, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566272, "linear_attention_total": 2359296, "linear_attention_nnz": 220672, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727040, "linear_attention_total": 2359296, "linear_attention_nnz": 260096, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 821760, "linear_attention_total": 2359296, "linear_attention_nnz": 380928, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 718080, "linear_attention_total": 2359296, "linear_attention_nnz": 215808, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 706048, "linear_attention_total": 2359296, "linear_attention_nnz": 251392, "linear_dense_total": 4718592, "linear_dense_nnz": 454656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 475648, "linear_attention_total": 2359296, "linear_attention_nnz": 103936, "linear_dense_total": 4718592, "linear_dense_nnz": 371712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 624384, "linear_attention_total": 2359296, "linear_attention_nnz": 284928, "linear_dense_total": 4718592, "linear_dense_nnz": 339456}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 388608, "linear_attention_total": 2359296, "linear_attention_nnz": 147456, "linear_dense_total": 4718592, "linear_dense_nnz": 241152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 268800, "linear_attention_total": 2359296, "linear_attention_nnz": 181248, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 199168, "linear_attention_total": 2359296, "linear_attention_nnz": 113152, "linear_dense_total": 4718592, "linear_dense_nnz": 86016}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 190720, "linear_attention_total": 2359296, "linear_attention_nnz": 93952, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 72.35285410787779, "linear_sparsity": 92.76228539737654}, "speed": {"eval_elapsed_time": 11.171043560025282}, "opt_eval_metrics": {"exact_match": 73.04635761589404, "f1": 82.29210924509454}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41908226, "linear_total": 84934656, "linear_nnz": 17949696, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081280, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 606720}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529856, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 841728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2437632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2115072, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1927680, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1448448, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 668160}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665088, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 173568}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 595968, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.51437244200017, "linear_sparsity": 78.86646412037037}, "speed": {"eval_elapsed_time": 13.23735156096518}, "opt_eval_metrics": {"exact_match": 78.05108798486282, "f1": 85.81174728555466}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41954306, "linear_total": 84934656, "linear_nnz": 17995776, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2079744, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1626624, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 840192}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434560, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1058304}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2116608, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1035264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1829376, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886208, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1099776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497600, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1210368, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 764928, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 546816, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.4720557446083, "linear_sparsity": 78.81221064814815}, "speed": {"eval_elapsed_time": 12.734316703979857}, "opt_eval_metrics": {"exact_match": 77.62535477767265, "f1": 85.49958980627748}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 36346370, "linear_total": 84934656, "linear_nnz": 12387840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1721856, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 950784, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1857024, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437696, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1113600, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1015296, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 425472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 731136, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 999936, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 213504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 296448, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 99840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 614400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 122880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}}, "total_sparsity": 66.62199781720042, "linear_sparsity": 85.4148582175926}, "speed": {"eval_elapsed_time": 11.27580028004013}, "opt_eval_metrics": {"exact_match": 76.75496688741723, "f1": 84.83470649534952}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 35750402, "linear_total": 84934656, "linear_nnz": 11791872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893952, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1247232, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 520704}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 978432, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 388608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 365568, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}}, "total_sparsity": 67.16929377013544, "linear_sparsity": 86.11653645833334}, "speed": {"eval_elapsed_time": 10.814438845962286}, "opt_eval_metrics": {"exact_match": 76.3197729422895, "f1": 84.62201750681498}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 35662850, "linear_total": 84934656, "linear_nnz": 11704320, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 897024, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 258048}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804800, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1184256, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1064448, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 474624}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 364032, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}}, "total_sparsity": 67.24969549518002, "linear_sparsity": 86.21961805555556}, "speed": {"eval_elapsed_time": 10.804757428006269}, "opt_eval_metrics": {"exact_match": 76.5279091769158, "f1": 84.6776690586996}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35638274, "linear_total": 84934656, "linear_nnz": 11679744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1586688, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 210432}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887808, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1175040, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 486912}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1062912, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 290304, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 93696}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360960, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 164352}}, "total_sparsity": 67.27226440045568, "linear_sparsity": 86.24855324074075}, "speed": {"eval_elapsed_time": 10.842320216004737}, "opt_eval_metrics": {"exact_match": 76.31031220435194, "f1": 84.63605545666391}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 36176130, "linear_total": 84934656, "linear_nnz": 12217600, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 911360, "linear_attention_total": 2359296, "linear_attention_nnz": 352256, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1284864, "linear_attention_total": 2359296, "linear_attention_nnz": 478464, "linear_dense_total": 4718592, "linear_dense_nnz": 806400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1386240, "linear_attention_total": 2359296, "linear_attention_nnz": 461568, "linear_dense_total": 4718592, "linear_dense_nnz": 924672}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1616128, "linear_attention_total": 2359296, "linear_attention_nnz": 763648, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1410048, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1415424, "linear_attention_total": 2359296, "linear_attention_nnz": 509184, "linear_dense_total": 4718592, "linear_dense_nnz": 906240}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1157632, "linear_attention_total": 2359296, "linear_attention_nnz": 458752, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1138688, "linear_attention_total": 2359296, "linear_attention_nnz": 550400, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 661504, "linear_attention_total": 2359296, "linear_attention_nnz": 311296, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 448512, "linear_attention_total": 2359296, "linear_attention_nnz": 319488, "linear_dense_total": 4718592, "linear_dense_nnz": 129024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379904, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 407296, "linear_attention_total": 2359296, "linear_attention_nnz": 160000, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}}, "total_sparsity": 66.7783345047871, "linear_sparsity": 85.6152946566358}, "speed": {"eval_elapsed_time": 14.883674454060383}, "opt_eval_metrics": {"exact_match": 76.43330179754021, "f1": 84.92125512821515}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33844482, "linear_total": 84934656, "linear_nnz": 9885952, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701184, "linear_attention_total": 2359296, "linear_attention_nnz": 295680, "linear_dense_total": 4718592, "linear_dense_nnz": 405504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042688, "linear_attention_total": 2359296, "linear_attention_nnz": 380672, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1087744, "linear_attention_total": 2359296, "linear_attention_nnz": 328960, "linear_dense_total": 4718592, "linear_dense_nnz": 758784}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1340672, "linear_attention_total": 2359296, "linear_attention_nnz": 612608, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1142784, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 811008}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1165312, "linear_attention_total": 2359296, "linear_attention_nnz": 411136, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908032, "linear_attention_total": 2359296, "linear_attention_nnz": 319744, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 956672, "linear_attention_total": 2359296, "linear_attention_nnz": 457472, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 557056, "linear_attention_total": 2359296, "linear_attention_nnz": 246784, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360192, "linear_attention_total": 2359296, "linear_attention_nnz": 252672, "linear_dense_total": 4718592, "linear_dense_nnz": 107520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315904, "linear_attention_total": 2359296, "linear_attention_nnz": 202240, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 307712, "linear_attention_total": 2359296, "linear_attention_nnz": 129536, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}}, "total_sparsity": 68.91955939281638, "linear_sparsity": 88.36052035108025}, "speed": {"eval_elapsed_time": 14.104866776964627}, "opt_eval_metrics": {"exact_match": 76.3670766319773, "f1": 84.90500621616839}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 50669453, "linear_total": 84934656, "linear_nnz": 26755584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3454464, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1979904}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2895360, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 2108928}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3497472, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2317824}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2995200, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 2110464}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2832384, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 2045952}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2528256, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 2036736}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2221056, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1631232}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1691136, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1297920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1684992, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 374784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1082880, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 591360}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1300992, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1006080}}, "total_sparsity": 53.46866515596302, "linear_sparsity": 68.4986255787037}, "speed": {"eval_elapsed_time": 15.358230478945188}, "opt_eval_metrics": {"exact_match": 78.12677388836329, "f1": 86.09062317714458}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 46753113, "linear_total": 84934656, "linear_nnz": 22841856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2904576, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1430016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2368512, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1582080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3084288, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1904640}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2600448, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1715712}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2244096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1654272}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096640, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1703424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1910784, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1476096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1082880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1534464, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 748032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523776, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 327168}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 990720, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1107456, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 812544}}, "total_sparsity": 57.06516200196401, "linear_sparsity": 73.10655381944444}, "speed": {"eval_elapsed_time": 13.718609332921915}, "opt_eval_metrics": {"exact_match": 78.31598864711448, "f1": 86.14732314693939}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 46580969, "linear_total": 84934656, "linear_nnz": 22669824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2883072, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1408512}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2342400, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1555968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3055104, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1875456}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2585088, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1700352}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1635840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2082816, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1689600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1901568, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1311744}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1468416, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1075200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1528320, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 741888}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520704, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 324096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1093632, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}}, "total_sparsity": 57.22324719197764, "linear_sparsity": 73.30910011574075}, "speed": {"eval_elapsed_time": 13.616545491036959}, "opt_eval_metrics": {"exact_match": 77.96594134342479, "f1": 85.85795020085484}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46293486, "linear_total": 84934656, "linear_nnz": 22382592, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850816, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1376256}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2323968, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1537536}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3022848, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1843200}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2557440, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1672704}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1620480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2068992, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1675776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790976, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1299456}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1462272, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1523712, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 737280}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 322560}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 970752, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 786432}}, "total_sparsity": 57.487251773494805, "linear_sparsity": 73.6472800925926}, "speed": {"eval_elapsed_time": 13.292588334996253}, "opt_eval_metrics": {"exact_match": 77.88079470198676, "f1": 85.81326419854291}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl300_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 26853628, "linear_total": 84934656, "linear_nnz": 2895098, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 357857, "linear_attention_total": 2359296, "linear_attention_nnz": 13195, "linear_dense_total": 4718592, "linear_dense_nnz": 344662}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 405482, "linear_attention_total": 2359296, "linear_attention_nnz": 53357, "linear_dense_total": 4718592, "linear_dense_nnz": 352125}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 395119, "linear_attention_total": 2359296, "linear_attention_nnz": 43981, "linear_dense_total": 4718592, "linear_dense_nnz": 351138}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 394117, "linear_attention_total": 2359296, "linear_attention_nnz": 71058, "linear_dense_total": 4718592, "linear_dense_nnz": 323059}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 335373, "linear_attention_total": 2359296, "linear_attention_nnz": 47705, "linear_dense_total": 4718592, "linear_dense_nnz": 287668}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 292526, "linear_attention_total": 2359296, "linear_attention_nnz": 40348, "linear_dense_total": 4718592, "linear_dense_nnz": 252178}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 238114, "linear_attention_total": 2359296, "linear_attention_nnz": 33002, "linear_dense_total": 4718592, "linear_dense_nnz": 205112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 188891, "linear_attention_total": 2359296, "linear_attention_nnz": 38753, "linear_dense_total": 4718592, "linear_dense_nnz": 150138}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 123365, "linear_attention_total": 2359296, "linear_attention_nnz": 22052, "linear_dense_total": 4718592, "linear_dense_nnz": 101313}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64415, "linear_attention_total": 2359296, "linear_attention_nnz": 28498, "linear_dense_total": 4718592, "linear_dense_nnz": 35917}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 53470, "linear_attention_total": 2359296, "linear_attention_nnz": 18747, "linear_dense_total": 4718592, "linear_dense_nnz": 34723}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 46369, "linear_attention_total": 2359296, "linear_attention_nnz": 15957, "linear_dense_total": 4718592, "linear_dense_nnz": 30412}}, "total_sparsity": 75.33947808267818, "linear_sparsity": 96.59138196780358}, "speed": {"eval_elapsed_time": 68.15529748401605, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 40641026, "linear_total": 84934656, "linear_nnz": 16682496, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1930752, "linear_attention_total": 2359296, "linear_attention_nnz": 1390080, "linear_dense_total": 4718592, "linear_dense_nnz": 540672}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1347840, "linear_attention_total": 2359296, "linear_attention_nnz": 622848, "linear_dense_total": 4718592, "linear_dense_nnz": 724992}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2423808, "linear_attention_total": 2359296, "linear_attention_nnz": 1506816, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1864704, "linear_attention_total": 2359296, "linear_attention_nnz": 966144, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1956096, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 990720}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742592, "linear_attention_total": 2359296, "linear_attention_nnz": 734976, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1323264, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835584, "linear_attention_total": 2359296, "linear_attention_nnz": 282624, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307904, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 403968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 536064, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 506880, "linear_attention_total": 2359296, "linear_attention_nnz": 322560, "linear_dense_total": 4718592, "linear_dense_nnz": 184320}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 761088, "linear_attention_total": 2359296, "linear_attention_nnz": 412416, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}}, "total_sparsity": 62.67808162027695, "linear_sparsity": 80.3584346064815}, "speed": {"eval_elapsed_time": 13.569498455966823}, "opt_eval_metrics": {"exact_match": 76.12109744560075, "f1": 84.59321000252827}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 39460610, "linear_total": 84934656, "linear_nnz": 15502080, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1801728, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1127424, "linear_attention_total": 2359296, "linear_attention_nnz": 471552, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2341632, "linear_attention_total": 2359296, "linear_attention_nnz": 1507584, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804032, "linear_attention_total": 2359296, "linear_attention_nnz": 960768, "linear_dense_total": 4718592, "linear_dense_nnz": 843264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899264, "linear_attention_total": 2359296, "linear_attention_nnz": 968448, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529088, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 564480, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738048, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 506880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 893952, "linear_dense_total": 4718592, "linear_dense_nnz": 384000}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668928, "linear_attention_total": 2359296, "linear_attention_nnz": 535296, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488448, "linear_attention_total": 2359296, "linear_attention_nnz": 319488, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 560640, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}}, "total_sparsity": 63.76209435179903, "linear_sparsity": 81.7482277199074}, "speed": {"eval_elapsed_time": 12.87935333198402}, "opt_eval_metrics": {"exact_match": 76.2251655629139, "f1": 84.80214537282716}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 39496706, "linear_total": 84934656, "linear_nnz": 15538176, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798656, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1122816, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 649728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2325504, "linear_attention_total": 2359296, "linear_attention_nnz": 1500672, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790208, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886976, "linear_attention_total": 2359296, "linear_attention_nnz": 963840, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522944, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 924672}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 558336, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 737280, "linear_attention_total": 2359296, "linear_attention_nnz": 235008, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1286400, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 382464}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665856, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 417792, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 559104, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 301056}}, "total_sparsity": 63.72894627217538, "linear_sparsity": 81.70572916666666}, "speed": {"eval_elapsed_time": 12.830008602933958}, "opt_eval_metrics": {"exact_match": 75.68590350047303, "f1": 84.47747389903205}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39419906, "linear_total": 84934656, "linear_nnz": 15461376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1800192, "linear_attention_total": 2359296, "linear_attention_nnz": 1334784, "linear_dense_total": 4718592, "linear_dense_nnz": 465408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1118976, "linear_attention_total": 2359296, "linear_attention_nnz": 473856, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2320896, "linear_attention_total": 2359296, "linear_attention_nnz": 1497600, "linear_dense_total": 4718592, "linear_dense_nnz": 823296}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1788672, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 832512}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1891584, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1520640, "linear_attention_total": 2359296, "linear_attention_nnz": 600576, "linear_dense_total": 4718592, "linear_dense_nnz": 920064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 732672, "linear_attention_total": 2359296, "linear_attention_nnz": 230400, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277184, "linear_attention_total": 2359296, "linear_attention_nnz": 897792, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 660480, "linear_attention_total": 2359296, "linear_attention_nnz": 528384, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555264, "linear_attention_total": 2359296, "linear_attention_nnz": 257280, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.79947410116185, "linear_sparsity": 81.79615162037037}, "speed": {"eval_elapsed_time": 12.832387157017365}, "opt_eval_metrics": {"exact_match": 76.0170293282876, "f1": 84.47498508158148}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39382274, "linear_total": 84934656, "linear_nnz": 15423744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1793280, "linear_attention_total": 2359296, "linear_attention_nnz": 1323264, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1115136, "linear_attention_total": 2359296, "linear_attention_nnz": 470016, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2321664, "linear_attention_total": 2359296, "linear_attention_nnz": 1496832, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1789440, "linear_attention_total": 2359296, "linear_attention_nnz": 960000, "linear_dense_total": 4718592, "linear_dense_nnz": 829440}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1843968, "linear_attention_total": 2359296, "linear_attention_nnz": 917760, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526016, "linear_attention_total": 2359296, "linear_attention_nnz": 607488, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 567552, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730368, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1279488, "linear_attention_total": 2359296, "linear_attention_nnz": 900096, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667392, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 556032, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.83403273736522, "linear_sparsity": 81.84045862268519}, "speed": {"eval_elapsed_time": 12.66309662302956}, "opt_eval_metrics": {"exact_match": 75.93188268684958, "f1": 84.50793088999642}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 55275810, "linear_total": 84934656, "linear_nnz": 31358976, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4016640, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 2542080}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3340800, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 2652672}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4039680, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2860032}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4041216, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2763264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3187200, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 2499072}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2870784, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 2477568}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2580480, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1990656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815552, "linear_attention_total": 2359296, "linear_attention_nnz": 245760, "linear_dense_total": 4718592, "linear_dense_nnz": 1569792}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1893888, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1107456}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 714240, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 517632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1281024, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 789504}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1577472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1282560}}, "total_sparsity": 49.23850423478289, "linear_sparsity": 63.078703703703695}, "speed": {"eval_elapsed_time": 16.857338295085356}, "opt_eval_metrics": {"exact_match": 78.21192052980132, "f1": 86.01032921346379}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47626001, "linear_total": 84934656, "linear_nnz": 23714304, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2895360, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2330112, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3240960, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2061312}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3095040, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1915392}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2291712, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1800192}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2221056, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1827840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1861632, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1300992, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 1104384}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637376, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 582144, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 385536}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1052160, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 560640}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1205760, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 910848}}, "total_sparsity": 56.2635617989908, "linear_sparsity": 72.07935474537037}, "speed": {"eval_elapsed_time": 13.838669790071435}, "opt_eval_metrics": {"exact_match": 77.78618732261117, "f1": 85.70556837897196}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41117954, "linear_total": 84934656, "linear_nnz": 17159424, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879296, "linear_attention_total": 2359296, "linear_attention_nnz": 1459968, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1487616, "linear_attention_total": 2359296, "linear_attention_nnz": 930048, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2451456, "linear_attention_total": 2359296, "linear_attention_nnz": 1651200, "linear_dense_total": 4718592, "linear_dense_nnz": 800256}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959168, "linear_attention_total": 2359296, "linear_attention_nnz": 1181952, "linear_dense_total": 4718592, "linear_dense_nnz": 777216}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1876992, "linear_attention_total": 2359296, "linear_attention_nnz": 996864, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 720384, "linear_dense_total": 4718592, "linear_dense_nnz": 886272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1242624, "linear_attention_total": 2359296, "linear_attention_nnz": 595968, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1026048, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1362432, "linear_attention_total": 2359296, "linear_attention_nnz": 1029120, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784128, "linear_attention_total": 2359296, "linear_attention_nnz": 673536, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563712, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.240103802270966, "linear_sparsity": 79.7969111689815}, "speed": {"eval_elapsed_time": 14.605739500955679}, "opt_eval_metrics": {"exact_match": 78.01324503311258, "f1": 85.85711399770457}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41037314, "linear_total": 84934656, "linear_nnz": 17078784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1881600, "linear_attention_total": 2359296, "linear_attention_nnz": 1460736, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488384, "linear_attention_total": 2359296, "linear_attention_nnz": 930816, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2430720, "linear_attention_total": 2359296, "linear_attention_nnz": 1636608, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1953024, "linear_attention_total": 2359296, "linear_attention_nnz": 1172736, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1822464, "linear_attention_total": 2359296, "linear_attention_nnz": 946944, "linear_dense_total": 4718592, "linear_dense_nnz": 875520}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602816, "linear_attention_total": 2359296, "linear_attention_nnz": 719616, "linear_dense_total": 4718592, "linear_dense_nnz": 883200}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1248768, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023744, "linear_attention_total": 2359296, "linear_attention_nnz": 530688, "linear_dense_total": 4718592, "linear_dense_nnz": 493056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360128, "linear_attention_total": 2359296, "linear_attention_nnz": 1026816, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 785664, "linear_attention_total": 2359296, "linear_attention_nnz": 675072, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 562176, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.31415802270676, "linear_sparsity": 79.89185474537037}, "speed": {"eval_elapsed_time": 14.334042510017753}, "opt_eval_metrics": {"exact_match": 78.02270577105014, "f1": 85.8869692285446}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 38110440, "linear_total": 84934656, "linear_nnz": 14151910, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1521793, "linear_attention_total": 2359296, "linear_attention_nnz": 87221, "linear_dense_total": 4718592, "linear_dense_nnz": 1434572}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637844, "linear_attention_total": 2359296, "linear_attention_nnz": 157517, "linear_dense_total": 4718592, "linear_dense_nnz": 1480327}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1723746, "linear_attention_total": 2359296, "linear_attention_nnz": 188172, "linear_dense_total": 4718592, "linear_dense_nnz": 1535574}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742961, "linear_attention_total": 2359296, "linear_attention_nnz": 230341, "linear_dense_total": 4718592, "linear_dense_nnz": 1512620}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1687428, "linear_attention_total": 2359296, "linear_attention_nnz": 240387, "linear_dense_total": 4718592, "linear_dense_nnz": 1447041}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1623377, "linear_attention_total": 2359296, "linear_attention_nnz": 195780, "linear_dense_total": 4718592, "linear_dense_nnz": 1427597}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1429982, "linear_attention_total": 2359296, "linear_attention_nnz": 184963, "linear_dense_total": 4718592, "linear_dense_nnz": 1245019}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130199, "linear_attention_total": 2359296, "linear_attention_nnz": 172954, "linear_dense_total": 4718592, "linear_dense_nnz": 957245}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 773896, "linear_attention_total": 2359296, "linear_attention_nnz": 138133, "linear_dense_total": 4718592, "linear_dense_nnz": 635763}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417863, "linear_attention_total": 2359296, "linear_attention_nnz": 112972, "linear_dense_total": 4718592, "linear_dense_nnz": 304891}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279992, "linear_attention_total": 2359296, "linear_attention_nnz": 75446, "linear_dense_total": 4718592, "linear_dense_nnz": 204546}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 182829, "linear_attention_total": 2359296, "linear_attention_nnz": 38439, "linear_dense_total": 4718592, "linear_dense_nnz": 144390}}, "total_sparsity": 65.00199746198996, "linear_sparsity": 83.3378850677867}, "speed": {"eval_elapsed_time": 78.46566343901213, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37366845, "linear_total": 84934656, "linear_nnz": 13408315, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1442154, "linear_attention_total": 2359296, "linear_attention_nnz": 79341, "linear_dense_total": 4718592, "linear_dense_nnz": 1362813}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1557975, "linear_attention_total": 2359296, "linear_attention_nnz": 146964, "linear_dense_total": 4718592, "linear_dense_nnz": 1411011}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637409, "linear_attention_total": 2359296, "linear_attention_nnz": 173655, "linear_dense_total": 4718592, "linear_dense_nnz": 1463754}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1655712, "linear_attention_total": 2359296, "linear_attention_nnz": 213353, "linear_dense_total": 4718592, "linear_dense_nnz": 1442359}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601748, "linear_attention_total": 2359296, "linear_attention_nnz": 221518, "linear_dense_total": 4718592, "linear_dense_nnz": 1380230}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1539647, "linear_attention_total": 2359296, "linear_attention_nnz": 179373, "linear_dense_total": 4718592, "linear_dense_nnz": 1360274}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1352289, "linear_attention_total": 2359296, "linear_attention_nnz": 168393, "linear_dense_total": 4718592, "linear_dense_nnz": 1183896}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1066215, "linear_attention_total": 2359296, "linear_attention_nnz": 159612, "linear_dense_total": 4718592, "linear_dense_nnz": 906603}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727923, "linear_attention_total": 2359296, "linear_attention_nnz": 127230, "linear_dense_total": 4718592, "linear_dense_nnz": 600693}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 390947, "linear_attention_total": 2359296, "linear_attention_nnz": 105257, "linear_dense_total": 4718592, "linear_dense_nnz": 285690}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 262617, "linear_attention_total": 2359296, "linear_attention_nnz": 70746, "linear_dense_total": 4718592, "linear_dense_nnz": 191871}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 173679, "linear_attention_total": 2359296, "linear_attention_nnz": 36271, "linear_dense_total": 4718592, "linear_dense_nnz": 137408}}, "total_sparsity": 65.68486388119823, "linear_sparsity": 84.21337575088313}, "speed": {"eval_elapsed_time": 78.30115663801553, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 45252556, "linear_total": 84934656, "linear_nnz": 21294026, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2152743, "linear_attention_total": 2359296, "linear_attention_nnz": 158912, "linear_dense_total": 4718592, "linear_dense_nnz": 1993831}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2265132, "linear_attention_total": 2359296, "linear_attention_nnz": 234395, "linear_dense_total": 4718592, "linear_dense_nnz": 2030737}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415512, "linear_attention_total": 2359296, "linear_attention_nnz": 301048, "linear_dense_total": 4718592, "linear_dense_nnz": 2114464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465567, "linear_attention_total": 2359296, "linear_attention_nnz": 358791, "linear_dense_total": 4718592, "linear_dense_nnz": 2106776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2457267, "linear_attention_total": 2359296, "linear_attention_nnz": 398673, "linear_dense_total": 4718592, "linear_dense_nnz": 2058594}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2410577, "linear_attention_total": 2359296, "linear_attention_nnz": 367333, "linear_dense_total": 4718592, "linear_dense_nnz": 2043244}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2206780, "linear_attention_total": 2359296, "linear_attention_nnz": 344288, "linear_dense_total": 4718592, "linear_dense_nnz": 1862492}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1819031, "linear_attention_total": 2359296, "linear_attention_nnz": 304514, "linear_dense_total": 4718592, "linear_dense_nnz": 1514517}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1364821, "linear_attention_total": 2359296, "linear_attention_nnz": 265513, "linear_dense_total": 4718592, "linear_dense_nnz": 1099308}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 828990, "linear_attention_total": 2359296, "linear_attention_nnz": 201714, "linear_dense_total": 4718592, "linear_dense_nnz": 627276}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574541, "linear_attention_total": 2359296, "linear_attention_nnz": 134277, "linear_dense_total": 4718592, "linear_dense_nnz": 440264}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 333065, "linear_attention_total": 2359296, "linear_attention_nnz": 63309, "linear_dense_total": 4718592, "linear_dense_nnz": 269756}}, "total_sparsity": 58.4431701722824, "linear_sparsity": 74.92893124804085}, "speed": {"eval_elapsed_time": 81.4040583850001, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 31863042, "linear_total": 84934656, "linear_nnz": 7904512, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 562688, "linear_attention_total": 2359296, "linear_attention_nnz": 260096, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852736, "linear_attention_total": 2359296, "linear_attention_nnz": 361216, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 836352, "linear_attention_total": 2359296, "linear_attention_nnz": 249600, "linear_dense_total": 4718592, "linear_dense_nnz": 586752}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1037824, "linear_attention_total": 2359296, "linear_attention_nnz": 487936, "linear_dense_total": 4718592, "linear_dense_nnz": 549888}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 981760, "linear_attention_total": 2359296, "linear_attention_nnz": 315136, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893184, "linear_attention_total": 2359296, "linear_attention_nnz": 329472, "linear_dense_total": 4718592, "linear_dense_nnz": 563712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621312, "linear_attention_total": 2359296, "linear_attention_nnz": 160512, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817664, "linear_attention_total": 2359296, "linear_attention_nnz": 407552, "linear_dense_total": 4718592, "linear_dense_nnz": 410112}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 446208, "linear_attention_total": 2359296, "linear_attention_nnz": 175872, "linear_dense_total": 4718592, "linear_dense_nnz": 270336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315392, "linear_attention_total": 2359296, "linear_attention_nnz": 218624, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279552, "linear_attention_total": 2359296, "linear_attention_nnz": 187392, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 259840, "linear_attention_total": 2359296, "linear_attention_nnz": 118528, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}}, "total_sparsity": 70.73917738066733, "linear_sparsity": 90.6934196566358}, "speed": {"eval_elapsed_time": 12.247032377053984}, "opt_eval_metrics": {"exact_match": 75.42100283822138, "f1": 84.06571558378387}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 38467586, "linear_total": 84934656, "linear_nnz": 14509056, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1740288, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 940032, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 448512}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1992192, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1728000, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1651200, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 717312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245696, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877056, "linear_attention_total": 2359296, "linear_attention_nnz": 442368, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049088, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 629760, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 233472}}, "total_sparsity": 64.67401918059409, "linear_sparsity": 82.9173900462963}, "speed": {"eval_elapsed_time": 12.096938933013007}, "opt_eval_metrics": {"exact_match": 77.9848628192999, "f1": 85.88807770994393}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 38065154, "linear_total": 84934656, "linear_nnz": 14106624, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1669632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 913920, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 422400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969152, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 631296}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1559040, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1219584, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 471552}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 955392, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1090560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 431616, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.04358500448319, "linear_sparsity": 83.3912037037037}, "speed": {"eval_elapsed_time": 24.926402524928562}, "opt_eval_metrics": {"exact_match": 78.29706717123936, "f1": 86.2648683969933}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38095874, "linear_total": 84934656, "linear_nnz": 14137344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907776, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1967616, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 591360}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1711104, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1608192, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1214976, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 625152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1161216, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 953856, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 413184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1041408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 482304, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 757248, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.0153738728886, "linear_sparsity": 83.35503472222221}, "speed": {"eval_elapsed_time": 11.562090583960526}, "opt_eval_metrics": {"exact_match": 77.43614001892148, "f1": 85.51882546766822}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 37879298, "linear_total": 84934656, "linear_nnz": 13920768, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1160960, "linear_attention_total": 2359296, "linear_attention_nnz": 454400, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245184, "linear_attention_total": 2359296, "linear_attention_nnz": 400384, "linear_dense_total": 4718592, "linear_dense_nnz": 844800}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553408, "linear_attention_total": 2359296, "linear_attention_nnz": 518144, "linear_dense_total": 4718592, "linear_dense_nnz": 1035264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1787648, "linear_attention_total": 2359296, "linear_attention_nnz": 803072, "linear_dense_total": 4718592, "linear_dense_nnz": 984576}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1640960, "linear_attention_total": 2359296, "linear_attention_nnz": 555008, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661696, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1015296}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383680, "linear_attention_total": 2359296, "linear_attention_nnz": 583424, "linear_dense_total": 4718592, "linear_dense_nnz": 800256}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1309440, "linear_attention_total": 2359296, "linear_attention_nnz": 652032, "linear_dense_total": 4718592, "linear_dense_nnz": 657408}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 689664, "linear_attention_total": 2359296, "linear_attention_nnz": 333312, "linear_dense_total": 4718592, "linear_dense_nnz": 356352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520960, "linear_attention_total": 2359296, "linear_attention_nnz": 382720, "linear_dense_total": 4718592, "linear_dense_nnz": 138240}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 461568, "linear_attention_total": 2359296, "linear_attention_nnz": 314112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 505600, "linear_attention_total": 2359296, "linear_attention_nnz": 203008, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}}, "total_sparsity": 65.21426235063046, "linear_sparsity": 83.61002604166666}, "speed": {"eval_elapsed_time": 14.720699563040398}, "opt_eval_metrics": {"exact_match": 76.82119205298014, "f1": 85.28474303662432}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35435778, "linear_total": 84934656, "linear_nnz": 11477248, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887040, "linear_attention_total": 2359296, "linear_attention_nnz": 384768, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1057792, "linear_attention_total": 2359296, "linear_attention_nnz": 355840, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1285888, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497088, "linear_attention_total": 2359296, "linear_attention_nnz": 672256, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1350912, "linear_attention_total": 2359296, "linear_attention_nnz": 418560, "linear_dense_total": 4718592, "linear_dense_nnz": 932352}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1395712, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1154816, "linear_attention_total": 2359296, "linear_attention_nnz": 498944, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059840, "linear_attention_total": 2359296, "linear_attention_nnz": 497664, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 609024, "linear_attention_total": 2359296, "linear_attention_nnz": 297216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 436224, "linear_attention_total": 2359296, "linear_attention_nnz": 316416, "linear_dense_total": 4718592, "linear_dense_nnz": 119808}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 256512, "linear_dense_total": 4718592, "linear_dense_nnz": 115200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371200, "linear_attention_total": 2359296, "linear_attention_nnz": 150016, "linear_dense_total": 4718592, "linear_dense_nnz": 221184}}, "total_sparsity": 67.45822277621669, "linear_sparsity": 86.4869671103395}, "speed": {"eval_elapsed_time": 13.966550998971798}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.29050695680083}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 49759613, "linear_total": 84934656, "linear_nnz": 25846272, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3251712, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2803200, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1918464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3320832, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3353088, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2075136}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2469888, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2322432, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1929216}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2098176, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1508352}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1641984, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1248768}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638912, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566784, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1075200, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 583680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1304064, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1009152}}, "total_sparsity": 54.304199529987116, "linear_sparsity": 69.56922743055556}, "speed": {"eval_elapsed_time": 14.872020053910092}, "opt_eval_metrics": {"exact_match": 77.8713339640492, "f1": 85.86552240887988}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 47529298, "linear_total": 84934656, "linear_nnz": 23617536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2446848, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1660416}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2978304, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1995264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2216448, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1724928}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1824768, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1333248}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526784, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1133568}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35236717199184, "linear_sparsity": 72.19328703703704}, "speed": {"eval_elapsed_time": 13.847230941057205}, "opt_eval_metrics": {"exact_match": 78.06054872280038, "f1": 85.94002543374285}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47521613, "linear_total": 84934656, "linear_nnz": 23609856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2445312, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1658880}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2976768, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1993728}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214912, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1723392}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1823232, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1525248, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1132032}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35942454654601, "linear_sparsity": 72.2023292824074}, "speed": {"eval_elapsed_time": 13.841004910878837}, "opt_eval_metrics": {"exact_match": 78.10785241248817, "f1": 86.00835164251778}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold_apme-sigmoied_threshold_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 61067266, "linear_total": 84934656, "linear_nnz": 37108736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3221504, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 2607104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3504128, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 2899968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4439040, "linear_attention_total": 2359296, "linear_attention_nnz": 730112, "linear_dense_total": 4718592, "linear_dense_nnz": 3708928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4859904, "linear_attention_total": 2359296, "linear_attention_nnz": 1044480, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4734976, "linear_attention_total": 2359296, "linear_attention_nnz": 1012736, "linear_dense_total": 4718592, "linear_dense_nnz": 3722240}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4660224, "linear_attention_total": 2359296, "linear_attention_nnz": 882688, "linear_dense_total": 4718592, "linear_dense_nnz": 3777536}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4239360, "linear_attention_total": 2359296, "linear_attention_nnz": 980992, "linear_dense_total": 4718592, "linear_dense_nnz": 3258368}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3137536, "linear_attention_total": 2359296, "linear_attention_nnz": 903168, "linear_dense_total": 4718592, "linear_dense_nnz": 2234368}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835008, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 1124352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877568, "linear_attention_total": 2359296, "linear_attention_nnz": 552960, "linear_dense_total": 4718592, "linear_dense_nnz": 324608}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852992, "linear_attention_total": 2359296, "linear_attention_nnz": 401408, "linear_dense_total": 4718592, "linear_dense_nnz": 451584}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746496, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 501760}}, "total_sparsity": 43.920030037508496, "linear_sparsity": 56.309076003086425}, "speed": {"eval_elapsed_time": 47.75363156700041}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-30000": {"stats": {"total": 108893186, "nnz": 67034114, "linear_total": 84934656, "linear_nnz": 43075584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4692480, "linear_attention_total": 2359296, "linear_attention_nnz": 892416, "linear_dense_total": 4718592, "linear_dense_nnz": 3800064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4379136, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 3657216}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4735488, "linear_attention_total": 2359296, "linear_attention_nnz": 920064, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4850688, "linear_attention_total": 2359296, "linear_attention_nnz": 1052160, "linear_dense_total": 4718592, "linear_dense_nnz": 3798528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4751616, "linear_attention_total": 2359296, "linear_attention_nnz": 1118976, "linear_dense_total": 4718592, "linear_dense_nnz": 3632640}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4547328, "linear_attention_total": 2359296, "linear_attention_nnz": 1017600, "linear_dense_total": 4718592, "linear_dense_nnz": 3529728}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4306944, "linear_attention_total": 2359296, "linear_attention_nnz": 1061376, "linear_dense_total": 4718592, "linear_dense_nnz": 3245568}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3525888, "linear_attention_total": 2359296, "linear_attention_nnz": 793344, "linear_dense_total": 4718592, "linear_dense_nnz": 2732544}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2793216, "linear_attention_total": 2359296, "linear_attention_nnz": 919296, "linear_dense_total": 4718592, "linear_dense_nnz": 1873920}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1507584, "linear_attention_total": 2359296, "linear_attention_nnz": 541440, "linear_dense_total": 4718592, "linear_dense_nnz": 966144}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130496, "linear_attention_total": 2359296, "linear_attention_nnz": 443904, "linear_dense_total": 4718592, "linear_dense_nnz": 686592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1854720, "linear_attention_total": 2359296, "linear_attention_nnz": 332544, "linear_dense_total": 4718592, "linear_dense_nnz": 1522176}}, "total_sparsity": 38.44048791078626, "linear_sparsity": 49.283854166666664}, "speed": {"eval_elapsed_time": 30.27796263305936}, "opt_eval_metrics": {"exact_match": 79.40397350993378, "f1": 86.95662988564573}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 43891202, "linear_total": 84934656, "linear_nnz": 19932672, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045184, "linear_attention_total": 2359296, "linear_attention_nnz": 427776, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102784, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 1708032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2424576, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 1955328}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2502912, "linear_attention_total": 2359296, "linear_attention_nnz": 579840, "linear_dense_total": 4718592, "linear_dense_nnz": 1923072}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2376960, "linear_attention_total": 2359296, "linear_attention_nnz": 539904, "linear_dense_total": 4718592, "linear_dense_nnz": 1837056}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2201856, "linear_attention_total": 2359296, "linear_attention_nnz": 424704, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1907712, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 1468416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1580544, "linear_attention_total": 2359296, "linear_attention_nnz": 428544, "linear_dense_total": 4718592, "linear_dense_nnz": 1152000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1095168, "linear_attention_total": 2359296, "linear_attention_nnz": 397824, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527616, "linear_attention_total": 2359296, "linear_attention_nnz": 235776, "linear_dense_total": 4718592, "linear_dense_nnz": 291840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 428544, "linear_attention_total": 2359296, "linear_attention_nnz": 182784, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738816, "linear_attention_total": 2359296, "linear_attention_nnz": 112128, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}}, "total_sparsity": 59.6933438975695, "linear_sparsity": 76.53175636574075}, "speed": {"eval_elapsed_time": 20.700779567006975}, "opt_eval_metrics": {"exact_match": 79.13907284768212, "f1": 86.92362610004827}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49229570, "linear_total": 84934656, "linear_nnz": 25271040, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214400, "linear_attention_total": 2359296, "linear_attention_nnz": 721408, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2390784, "linear_attention_total": 2359296, "linear_attention_nnz": 635136, "linear_dense_total": 4718592, "linear_dense_nnz": 1755648}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850560, "linear_attention_total": 2359296, "linear_attention_nnz": 972032, "linear_dense_total": 4718592, "linear_dense_nnz": 1878528}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3188736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 1932288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3149824, "linear_attention_total": 2359296, "linear_attention_nnz": 1260544, "linear_dense_total": 4718592, "linear_dense_nnz": 1889280}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 1784832}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2455040, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015744, "linear_attention_total": 2359296, "linear_attention_nnz": 988160, "linear_dense_total": 4718592, "linear_dense_nnz": 1027584}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1550080, "linear_attention_total": 2359296, "linear_attention_nnz": 903424, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 886784, "linear_attention_total": 2359296, "linear_attention_nnz": 636416, "linear_dense_total": 4718592, "linear_dense_nnz": 250368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 682752, "linear_attention_total": 2359296, "linear_attention_nnz": 484608, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 980224, "linear_attention_total": 2359296, "linear_attention_nnz": 313600, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}}, "total_sparsity": 54.79095450471988, "linear_sparsity": 70.2464916087963}, "speed": {"eval_elapsed_time": 20.624390312936157}, "opt_eval_metrics": {"exact_match": 80.5771050141911, "f1": 88.02575212811699}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 53943554, "linear_total": 84934656, "linear_nnz": 29985024, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3576576, "linear_attention_total": 2359296, "linear_attention_nnz": 840960, "linear_dense_total": 4718592, "linear_dense_nnz": 2735616}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070464, "linear_attention_total": 2359296, "linear_attention_nnz": 288768, "linear_dense_total": 4718592, "linear_dense_nnz": 2781696}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3222528, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 3024384}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3771648, "linear_attention_total": 2359296, "linear_attention_nnz": 770304, "linear_dense_total": 4718592, "linear_dense_nnz": 3001344}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3129600, "linear_attention_total": 2359296, "linear_attention_nnz": 393984, "linear_dense_total": 4718592, "linear_dense_nnz": 2735616}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2864640, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 2668032}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2801664, "linear_attention_total": 2359296, "linear_attention_nnz": 548352, "linear_dense_total": 4718592, "linear_dense_nnz": 2253312}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2118144, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 1920000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1646592, "linear_attention_total": 2359296, "linear_attention_nnz": 284160, "linear_dense_total": 4718592, "linear_dense_nnz": 1362432}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 861696, "linear_attention_total": 2359296, "linear_attention_nnz": 202752, "linear_dense_total": 4718592, "linear_dense_nnz": 658944}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 288000, "linear_dense_total": 4718592, "linear_dense_nnz": 907776}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725696, "linear_attention_total": 2359296, "linear_attention_nnz": 315648, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}}, "total_sparsity": 50.461956361530284, "linear_sparsity": 64.69636140046296}, "speed": {"eval_elapsed_time": 14.420848372974433}, "opt_eval_metrics": {"exact_match": 73.90728476821192, "f1": 82.48749394175648}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49808642, "linear_total": 84934656, "linear_nnz": 25850112, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2888448, "linear_attention_total": 2359296, "linear_attention_nnz": 652032, "linear_dense_total": 4718592, "linear_dense_nnz": 2236416}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2680320, "linear_attention_total": 2359296, "linear_attention_nnz": 293376, "linear_dense_total": 4718592, "linear_dense_nnz": 2386944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2864640, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 2666496}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3171072, "linear_attention_total": 2359296, "linear_attention_nnz": 530688, "linear_dense_total": 4718592, "linear_dense_nnz": 2640384}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2757888, "linear_attention_total": 2359296, "linear_attention_nnz": 392448, "linear_dense_total": 4718592, "linear_dense_nnz": 2365440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2569728, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 2373120}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2244096, "linear_attention_total": 2359296, "linear_attention_nnz": 310272, "linear_dense_total": 4718592, "linear_dense_nnz": 1933824}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1843968, "linear_attention_total": 2359296, "linear_attention_nnz": 197376, "linear_dense_total": 4718592, "linear_dense_nnz": 1646592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1384704, "linear_attention_total": 2359296, "linear_attention_nnz": 200448, "linear_dense_total": 4718592, "linear_dense_nnz": 1184256}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 204288, "linear_dense_total": 4718592, "linear_dense_nnz": 556032}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1115904, "linear_attention_total": 2359296, "linear_attention_nnz": 286464, "linear_dense_total": 4718592, "linear_dense_nnz": 829440}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1569024, "linear_attention_total": 2359296, "linear_attention_nnz": 315648, "linear_dense_total": 4718592, "linear_dense_nnz": 1253376}}, "total_sparsity": 54.25917467416189, "linear_sparsity": 69.56470630787037}, "speed": {"eval_elapsed_time": 12.429447512025945}, "opt_eval_metrics": {"exact_match": 70.05676442762535, "f1": 79.26883508935717}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 42173698, "linear_total": 84934656, "linear_nnz": 18215168, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1516544, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1758464, "linear_attention_total": 2359296, "linear_attention_nnz": 564992, "linear_dense_total": 4718592, "linear_dense_nnz": 1193472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2030080, "linear_attention_total": 2359296, "linear_attention_nnz": 646144, "linear_dense_total": 4718592, "linear_dense_nnz": 1383936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2328832, "linear_attention_total": 2359296, "linear_attention_nnz": 969472, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2283264, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1425408}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2099200, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 1396224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1846784, "linear_attention_total": 2359296, "linear_attention_nnz": 774656, "linear_dense_total": 4718592, "linear_dense_nnz": 1072128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1589760, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 783360}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 967424, "linear_attention_total": 2359296, "linear_attention_nnz": 520448, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 617216, "linear_attention_total": 2359296, "linear_attention_nnz": 435968, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 521984, "linear_attention_total": 2359296, "linear_attention_nnz": 354560, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655616, "linear_attention_total": 2359296, "linear_attention_nnz": 231680, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.27058124647028, "linear_sparsity": 78.55390383873457}, "speed": {"eval_elapsed_time": 16.997670065960847}, "opt_eval_metrics": {"exact_match": 79.2620624408704, "f1": 86.94475047733708}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42038274, "linear_total": 84934656, "linear_nnz": 18079744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1493248, "linear_attention_total": 2359296, "linear_attention_nnz": 519424, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757440, "linear_attention_total": 2359296, "linear_attention_nnz": 565504, "linear_dense_total": 4718592, "linear_dense_nnz": 1191936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028800, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1382400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2297088, "linear_attention_total": 2359296, "linear_attention_nnz": 937728, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2270464, "linear_attention_total": 2359296, "linear_attention_nnz": 846592, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081792, "linear_attention_total": 2359296, "linear_attention_nnz": 688640, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815296, "linear_attention_total": 2359296, "linear_attention_nnz": 744704, "linear_dense_total": 4718592, "linear_dense_nnz": 1070592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1613312, "linear_attention_total": 2359296, "linear_attention_nnz": 831488, "linear_dense_total": 4718592, "linear_dense_nnz": 781824}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 522496, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 594944, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 513792, "linear_attention_total": 2359296, "linear_attention_nnz": 346368, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 644096, "linear_attention_total": 2359296, "linear_attention_nnz": 220160, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.39494531824976, "linear_sparsity": 78.7133487654321}, "speed": {"eval_elapsed_time": 16.94853257900104}, "opt_eval_metrics": {"exact_match": 79.2620624408704, "f1": 86.97983371588884}}}, "base_speed_report": {"eval_elapsed_time": 38.708956059999764}} \ No newline at end of file diff --git a/analysis/files/results/results5.json b/analysis/files/results/results5.json deleted file mode 100644 index 83927515..00000000 --- a/analysis/files/results/results5.json +++ /dev/null @@ -1 +0,0 @@ -{"checkpoints": {"/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 60368184, "linear_total": 84934656, "linear_nnz": 36440832, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3788032, "linear_attention_total": 2359296, "linear_attention_nnz": 1170688, "linear_dense_total": 4718592, "linear_dense_nnz": 2617344}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3460352, "linear_attention_total": 2359296, "linear_attention_nnz": 956672, "linear_dense_total": 4718592, "linear_dense_nnz": 2503680}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4003072, "linear_attention_total": 2359296, "linear_attention_nnz": 1288960, "linear_dense_total": 4718592, "linear_dense_nnz": 2714112}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4128768, "linear_attention_total": 2359296, "linear_attention_nnz": 1483776, "linear_dense_total": 4718592, "linear_dense_nnz": 2644992}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4293120, "linear_attention_total": 2359296, "linear_attention_nnz": 1712640, "linear_dense_total": 4718592, "linear_dense_nnz": 2580480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4090112, "linear_attention_total": 2359296, "linear_attention_nnz": 1598720, "linear_dense_total": 4718592, "linear_dense_nnz": 2491392}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3693824, "linear_attention_total": 2359296, "linear_attention_nnz": 1647872, "linear_dense_total": 4718592, "linear_dense_nnz": 2045952}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2870272, "linear_attention_total": 2359296, "linear_attention_nnz": 1278976, "linear_dense_total": 4718592, "linear_dense_nnz": 1591296}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2240256, "linear_attention_total": 2359296, "linear_attention_nnz": 1321728, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1299200, "linear_attention_total": 2359296, "linear_attention_nnz": 826112, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1011456, "linear_attention_total": 2359296, "linear_attention_nnz": 676608, "linear_dense_total": 4718592, "linear_dense_nnz": 334848}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1562368, "linear_attention_total": 2359296, "linear_attention_nnz": 473344, "linear_dense_total": 4718592, "linear_dense_nnz": 1089024}}, "total_sparsity": 44.56201878416892, "linear_sparsity": 57.095449942129626}, "speed": {"eval_elapsed_time": 21.40440218592994}, "opt_eval_metrics": {"exact_match": 1.4853358561967833, "f1": 8.997331194701044}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53990689, "linear_total": 84934656, "linear_nnz": 30067968, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2828544, "linear_attention_total": 2359296, "linear_attention_nnz": 880896, "linear_dense_total": 4718592, "linear_dense_nnz": 1947648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2819840, "linear_attention_total": 2359296, "linear_attention_nnz": 849152, "linear_dense_total": 4718592, "linear_dense_nnz": 1970688}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3367424, "linear_attention_total": 2359296, "linear_attention_nnz": 1169408, "linear_dense_total": 4718592, "linear_dense_nnz": 2198016}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3521280, "linear_attention_total": 2359296, "linear_attention_nnz": 1352448, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3693824, "linear_attention_total": 2359296, "linear_attention_nnz": 1524992, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3554560, "linear_attention_total": 2359296, "linear_attention_nnz": 1511680, "linear_dense_total": 4718592, "linear_dense_nnz": 2042880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2935296, "linear_attention_total": 2359296, "linear_attention_nnz": 1336320, "linear_dense_total": 4718592, "linear_dense_nnz": 1598976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2452992, "linear_attention_total": 2359296, "linear_attention_nnz": 1178112, "linear_dense_total": 4718592, "linear_dense_nnz": 1274880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1836032, "linear_attention_total": 2359296, "linear_attention_nnz": 1134080, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1024000, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 321024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 812032, "linear_attention_total": 2359296, "linear_attention_nnz": 583168, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222144, "linear_attention_total": 2359296, "linear_attention_nnz": 397312, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}}, "total_sparsity": 50.41867082482094, "linear_sparsity": 64.59870515046296}, "speed": {"eval_elapsed_time": 20.464980722172186}, "opt_eval_metrics": {"exact_match": 2.185430463576159, "f1": 10.235907731105511}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53994017, "linear_total": 84934656, "linear_nnz": 30071296, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2825984, "linear_attention_total": 2359296, "linear_attention_nnz": 878336, "linear_dense_total": 4718592, "linear_dense_nnz": 1947648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2823424, "linear_attention_total": 2359296, "linear_attention_nnz": 852736, "linear_dense_total": 4718592, "linear_dense_nnz": 1970688}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3366400, "linear_attention_total": 2359296, "linear_attention_nnz": 1168384, "linear_dense_total": 4718592, "linear_dense_nnz": 2198016}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3529216, "linear_attention_total": 2359296, "linear_attention_nnz": 1360384, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3694080, "linear_attention_total": 2359296, "linear_attention_nnz": 1525248, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3562240, "linear_attention_total": 2359296, "linear_attention_nnz": 1519360, "linear_dense_total": 4718592, "linear_dense_nnz": 2042880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2944768, "linear_attention_total": 2359296, "linear_attention_nnz": 1345792, "linear_dense_total": 4718592, "linear_dense_nnz": 1598976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2450176, "linear_attention_total": 2359296, "linear_attention_nnz": 1175296, "linear_dense_total": 4718592, "linear_dense_nnz": 1274880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828864, "linear_attention_total": 2359296, "linear_attention_nnz": 1126912, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023488, "linear_attention_total": 2359296, "linear_attention_nnz": 702464, "linear_dense_total": 4718592, "linear_dense_nnz": 321024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 812032, "linear_attention_total": 2359296, "linear_attention_nnz": 583168, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1210624, "linear_attention_total": 2359296, "linear_attention_nnz": 385792, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}}, "total_sparsity": 50.41561461889819, "linear_sparsity": 64.5947868441358}, "speed": {"eval_elapsed_time": 20.669576363172382}, "opt_eval_metrics": {"exact_match": 1.8448438978240302, "f1": 9.536169896176048}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_0/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39362145, "linear_total": 84934656, "linear_nnz": 15449344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1074688, "linear_attention_total": 2359296, "linear_attention_nnz": 796672, "linear_dense_total": 4718592, "linear_dense_nnz": 278016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236224, "linear_attention_total": 2359296, "linear_attention_nnz": 769280, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1635328, "linear_attention_total": 2359296, "linear_attention_nnz": 1056256, "linear_dense_total": 4718592, "linear_dense_nnz": 579072}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 1259264, "linear_dense_total": 4718592, "linear_dense_nnz": 640512}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2041856, "linear_attention_total": 2359296, "linear_attention_nnz": 1344512, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860096, "linear_attention_total": 2359296, "linear_attention_nnz": 1244160, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571072, "linear_attention_total": 2359296, "linear_attention_nnz": 1088768, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1299200, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 382464}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1150464, "linear_attention_total": 2359296, "linear_attention_nnz": 955392, "linear_dense_total": 4718592, "linear_dense_nnz": 195072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668928, "linear_attention_total": 2359296, "linear_attention_nnz": 590592, "linear_dense_total": 4718592, "linear_dense_nnz": 78336}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 548352, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 72192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 463360, "linear_attention_total": 2359296, "linear_attention_nnz": 308224, "linear_dense_total": 4718592, "linear_dense_nnz": 155136}}, "total_sparsity": 63.85251782420986, "linear_sparsity": 81.81031780478395}, "speed": {"eval_elapsed_time": 18.96496795094572}, "opt_eval_metrics": {"exact_match": 79.33774834437087, "f1": 87.07382313022637}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42067458, "linear_total": 84934656, "linear_nnz": 18108928, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437184, "linear_attention_total": 2359296, "linear_attention_nnz": 472576, "linear_dense_total": 4718592, "linear_dense_nnz": 964608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1754624, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015488, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2276608, "linear_attention_total": 2359296, "linear_attention_nnz": 951040, "linear_dense_total": 4718592, "linear_dense_nnz": 1325568}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2280448, "linear_attention_total": 2359296, "linear_attention_nnz": 861184, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2123008, "linear_attention_total": 2359296, "linear_attention_nnz": 779008, "linear_dense_total": 4718592, "linear_dense_nnz": 1344000}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841152, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1041408}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553664, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042432, "linear_attention_total": 2359296, "linear_attention_nnz": 610816, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 584960, "linear_attention_total": 2359296, "linear_attention_nnz": 405248, "linear_dense_total": 4718592, "linear_dense_nnz": 179712}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 540928, "linear_attention_total": 2359296, "linear_attention_nnz": 395008, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 658432, "linear_attention_total": 2359296, "linear_attention_nnz": 217600, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}}, "total_sparsity": 61.3681447432349, "linear_sparsity": 78.6789882330247}, "speed": {"eval_elapsed_time": 18.375705623999238}, "opt_eval_metrics": {"exact_match": 79.38505203405866, "f1": 87.07400986053686}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41735426, "linear_total": 84934656, "linear_nnz": 17776896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1405440, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 929280}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1732352, "linear_attention_total": 2359296, "linear_attention_nnz": 589568, "linear_dense_total": 4718592, "linear_dense_nnz": 1142784}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1979136, "linear_attention_total": 2359296, "linear_attention_nnz": 628992, "linear_dense_total": 4718592, "linear_dense_nnz": 1350144}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2218752, "linear_attention_total": 2359296, "linear_attention_nnz": 913152, "linear_dense_total": 4718592, "linear_dense_nnz": 1305600}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2257664, "linear_attention_total": 2359296, "linear_attention_nnz": 850688, "linear_dense_total": 4718592, "linear_dense_nnz": 1406976}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096384, "linear_attention_total": 2359296, "linear_attention_nnz": 764672, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1786112, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 1022976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1538816, "linear_attention_total": 2359296, "linear_attention_nnz": 781568, "linear_dense_total": 4718592, "linear_dense_nnz": 757248}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1027840, "linear_attention_total": 2359296, "linear_attention_nnz": 596224, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 176640}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523008, "linear_attention_total": 2359296, "linear_attention_nnz": 378624, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640000, "linear_attention_total": 2359296, "linear_attention_nnz": 208384, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.67306005721974, "linear_sparsity": 79.0699146412037}, "speed": {"eval_elapsed_time": 17.870226074010134}, "opt_eval_metrics": {"exact_match": 78.72280037842951, "f1": 86.62043892712619}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 43462146, "linear_total": 84934656, "linear_nnz": 19503616, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660672, "linear_attention_total": 2359296, "linear_attention_nnz": 579328, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 632576, "linear_dense_total": 4718592, "linear_dense_nnz": 1267200}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2031104, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1446912}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2544128, "linear_attention_total": 2359296, "linear_attention_nnz": 1049600, "linear_dense_total": 4718592, "linear_dense_nnz": 1494528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2395904, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 1479168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2184960, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 1394688}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1912320, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1113600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 969216, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 471808, "linear_dense_total": 4718592, "linear_dense_nnz": 497664}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 717312, "linear_attention_total": 2359296, "linear_attention_nnz": 505344, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631040, "linear_attention_total": 2359296, "linear_attention_nnz": 448256, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 288256, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}}, "total_sparsity": 60.08735936884057, "linear_sparsity": 77.03691647376543}, "speed": {"eval_elapsed_time": 16.235010980977677}, "opt_eval_metrics": {"exact_match": 78.93093661305582, "f1": 86.85787750084084}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42678018, "linear_total": 84934656, "linear_nnz": 18719488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1560320, "linear_attention_total": 2359296, "linear_attention_nnz": 543488, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1816320, "linear_attention_total": 2359296, "linear_attention_nnz": 593664, "linear_dense_total": 4718592, "linear_dense_nnz": 1222656}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2024704, "linear_attention_total": 2359296, "linear_attention_nnz": 603904, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2329856, "linear_attention_total": 2359296, "linear_attention_nnz": 870656, "linear_dense_total": 4718592, "linear_dense_nnz": 1459200}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2332928, "linear_attention_total": 2359296, "linear_attention_nnz": 887552, "linear_dense_total": 4718592, "linear_dense_nnz": 1445376}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 720640, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742080, "linear_attention_total": 2359296, "linear_attention_nnz": 926464, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 944384, "linear_attention_total": 2359296, "linear_attention_nnz": 455936, "linear_dense_total": 4718592, "linear_dense_nnz": 488448}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705280, "linear_attention_total": 2359296, "linear_attention_nnz": 505600, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 587264, "linear_attention_total": 2359296, "linear_attention_nnz": 409088, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 697856, "linear_attention_total": 2359296, "linear_attention_nnz": 250880, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}}, "total_sparsity": 60.80744850279245, "linear_sparsity": 77.96012972608024}, "speed": {"eval_elapsed_time": 16.02295208198484}, "opt_eval_metrics": {"exact_match": 78.78902554399244, "f1": 86.63899702391797}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 46192898, "linear_total": 84934656, "linear_nnz": 22234368, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 551680, "linear_dense_total": 4718592, "linear_dense_nnz": 1539072}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2278656, "linear_attention_total": 2359296, "linear_attention_nnz": 596736, "linear_dense_total": 4718592, "linear_dense_nnz": 1681920}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2418688, "linear_attention_total": 2359296, "linear_attention_nnz": 567808, "linear_dense_total": 4718592, "linear_dense_nnz": 1850880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2844416, "linear_attention_total": 2359296, "linear_attention_nnz": 1002752, "linear_dense_total": 4718592, "linear_dense_nnz": 1841664}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2691072, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 1812480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2475264, "linear_attention_total": 2359296, "linear_attention_nnz": 721152, "linear_dense_total": 4718592, "linear_dense_nnz": 1754112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2229248, "linear_attention_total": 2359296, "linear_attention_nnz": 805376, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1966336, "linear_attention_total": 2359296, "linear_attention_nnz": 892672, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701440, "linear_attention_total": 2359296, "linear_attention_nnz": 454144, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598272, "linear_attention_total": 2359296, "linear_attention_nnz": 361728, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858880, "linear_attention_total": 2359296, "linear_attention_nnz": 238336, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}}, "total_sparsity": 57.57962486284496, "linear_sparsity": 73.82179542824075}, "speed": {"eval_elapsed_time": 17.927992683951743}, "opt_eval_metrics": {"exact_match": 79.38505203405866, "f1": 86.84616693145111}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39741442, "linear_total": 84934656, "linear_nnz": 15782912, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1171968, "linear_attention_total": 2359296, "linear_attention_nnz": 511488, "linear_dense_total": 4718592, "linear_dense_nnz": 660480}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1531136, "linear_attention_total": 2359296, "linear_attention_nnz": 591104, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1722112, "linear_attention_total": 2359296, "linear_attention_nnz": 656128, "linear_dense_total": 4718592, "linear_dense_nnz": 1065984}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2136320, "linear_attention_total": 2359296, "linear_attention_nnz": 985856, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2026752, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1168896}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1822976, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 1138176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488384, "linear_attention_total": 2359296, "linear_attention_nnz": 668160, "linear_dense_total": 4718592, "linear_dense_nnz": 820224}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1423104, "linear_attention_total": 2359296, "linear_attention_nnz": 793344, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 861184, "linear_attention_total": 2359296, "linear_attention_nnz": 494080, "linear_dense_total": 4718592, "linear_dense_nnz": 367104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 570880, "linear_attention_total": 2359296, "linear_attention_nnz": 417280, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 499968, "linear_attention_total": 2359296, "linear_attention_nnz": 370944, "linear_dense_total": 4718592, "linear_dense_nnz": 129024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 528128, "linear_attention_total": 2359296, "linear_attention_nnz": 224000, "linear_dense_total": 4718592, "linear_dense_nnz": 304128}}, "total_sparsity": 63.504197590471826, "linear_sparsity": 81.41758294753086}, "speed": {"eval_elapsed_time": 16.204893412068486}, "opt_eval_metrics": {"exact_match": 78.93093661305582, "f1": 86.77654280449566}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-130000": {"stats": {"total": 108893186, "nnz": 38778370, "linear_total": 84934656, "linear_nnz": 14819840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1050624, "linear_attention_total": 2359296, "linear_attention_nnz": 488448, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 512512, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1628160, "linear_attention_total": 2359296, "linear_attention_nnz": 628224, "linear_dense_total": 4718592, "linear_dense_nnz": 999936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998592, "linear_attention_total": 2359296, "linear_attention_nnz": 937216, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1939968, "linear_attention_total": 2359296, "linear_attention_nnz": 821760, "linear_dense_total": 4718592, "linear_dense_nnz": 1118208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1709824, "linear_attention_total": 2359296, "linear_attention_nnz": 648448, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404928, "linear_attention_total": 2359296, "linear_attention_nnz": 641536, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817920, "linear_attention_total": 2359296, "linear_attention_nnz": 467712, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 544512, "linear_attention_total": 2359296, "linear_attention_nnz": 403200, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484096, "linear_attention_total": 2359296, "linear_attention_nnz": 367360, "linear_dense_total": 4718592, "linear_dense_nnz": 116736}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496896, "linear_attention_total": 2359296, "linear_attention_nnz": 225024, "linear_dense_total": 4718592, "linear_dense_nnz": 271872}}, "total_sparsity": 64.38861656596218, "linear_sparsity": 82.5514805169753}, "speed": {"eval_elapsed_time": 16.045786170987412}, "opt_eval_metrics": {"exact_match": 78.88363292336803, "f1": 86.63235572290178}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-165000": {"stats": {"total": 108893186, "nnz": 38293506, "linear_total": 84934656, "linear_nnz": 14334976, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1010688, "linear_attention_total": 2359296, "linear_attention_nnz": 468480, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1371392, "linear_attention_total": 2359296, "linear_attention_nnz": 518912, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1590272, "linear_attention_total": 2359296, "linear_attention_nnz": 608768, "linear_dense_total": 4718592, "linear_dense_nnz": 981504}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895936, "linear_attention_total": 2359296, "linear_attention_nnz": 869888, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1869568, "linear_attention_total": 2359296, "linear_attention_nnz": 775936, "linear_dense_total": 4718592, "linear_dense_nnz": 1093632}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663232, "linear_attention_total": 2359296, "linear_attention_nnz": 618752, "linear_dense_total": 4718592, "linear_dense_nnz": 1044480}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 629248, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1295872, "linear_attention_total": 2359296, "linear_attention_nnz": 707584, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 808704, "linear_attention_total": 2359296, "linear_attention_nnz": 463104, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 515840, "linear_attention_total": 2359296, "linear_attention_nnz": 376064, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455936, "linear_attention_total": 2359296, "linear_attention_nnz": 345344, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 212992, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 64.83388225963009, "linear_sparsity": 83.1223476080247}, "speed": {"eval_elapsed_time": 15.8394883510191}, "opt_eval_metrics": {"exact_match": 78.9593188268685, "f1": 86.71766917125102}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38916354, "linear_total": 84934656, "linear_nnz": 14957824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1209344, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 749568}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1494272, "linear_attention_total": 2359296, "linear_attention_nnz": 488192, "linear_dense_total": 4718592, "linear_dense_nnz": 1006080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1636096, "linear_attention_total": 2359296, "linear_attention_nnz": 550144, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969664, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1746944, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 1198080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1782272, "linear_attention_total": 2359296, "linear_attention_nnz": 653312, "linear_dense_total": 4718592, "linear_dense_nnz": 1128960}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1461760, "linear_attention_total": 2359296, "linear_attention_nnz": 593920, "linear_dense_total": 4718592, "linear_dense_nnz": 867840}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 754688, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531968, "linear_attention_total": 2359296, "linear_attention_nnz": 373760, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460032, "linear_attention_total": 2359296, "linear_attention_nnz": 311040, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}}, "total_sparsity": 64.26190156654981, "linear_sparsity": 82.38902150848766}, "speed": {"eval_elapsed_time": 15.605650334036909}, "opt_eval_metrics": {"exact_match": 78.69441816461683, "f1": 86.58409293332078}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 50872322, "linear_total": 84934656, "linear_nnz": 26913792, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692352, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 2007552}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666496, "linear_attention_total": 2359296, "linear_attention_nnz": 646656, "linear_dense_total": 4718592, "linear_dense_nnz": 2019840}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2931200, "linear_attention_total": 2359296, "linear_attention_nnz": 691712, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3361024, "linear_attention_total": 2359296, "linear_attention_nnz": 1149184, "linear_dense_total": 4718592, "linear_dense_nnz": 2211840}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3165952, "linear_attention_total": 2359296, "linear_attention_nnz": 1007872, "linear_dense_total": 4718592, "linear_dense_nnz": 2158080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070976, "linear_attention_total": 2359296, "linear_attention_nnz": 997376, "linear_dense_total": 4718592, "linear_dense_nnz": 2073600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2644480, "linear_attention_total": 2359296, "linear_attention_nnz": 911872, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2248704, "linear_attention_total": 2359296, "linear_attention_nnz": 944640, "linear_dense_total": 4718592, "linear_dense_nnz": 1304064}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1514240, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 751104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 839424, "linear_attention_total": 2359296, "linear_attention_nnz": 526080, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 707072, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 274944}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1071872, "linear_attention_total": 2359296, "linear_attention_nnz": 277760, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}}, "total_sparsity": 53.282364242699266, "linear_sparsity": 68.31235532407408}, "speed": {"eval_elapsed_time": 20.12763703102246}, "opt_eval_metrics": {"exact_match": 79.94323557237465, "f1": 87.52956877579788}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl225_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 27752545, "linear_total": 84934656, "linear_nnz": 3794015, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 465383, "linear_attention_total": 2359296, "linear_attention_nnz": 18728, "linear_dense_total": 4718592, "linear_dense_nnz": 446655}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527397, "linear_attention_total": 2359296, "linear_attention_nnz": 63059, "linear_dense_total": 4718592, "linear_dense_nnz": 464338}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516492, "linear_attention_total": 2359296, "linear_attention_nnz": 53761, "linear_dense_total": 4718592, "linear_dense_nnz": 462731}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 514972, "linear_attention_total": 2359296, "linear_attention_nnz": 84624, "linear_dense_total": 4718592, "linear_dense_nnz": 430348}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 443214, "linear_attention_total": 2359296, "linear_attention_nnz": 58345, "linear_dense_total": 4718592, "linear_dense_nnz": 384869}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 396921, "linear_attention_total": 2359296, "linear_attention_nnz": 50615, "linear_dense_total": 4718592, "linear_dense_nnz": 346306}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 319004, "linear_attention_total": 2359296, "linear_attention_nnz": 41344, "linear_dense_total": 4718592, "linear_dense_nnz": 277660}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 249183, "linear_attention_total": 2359296, "linear_attention_nnz": 47420, "linear_dense_total": 4718592, "linear_dense_nnz": 201763}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 161062, "linear_attention_total": 2359296, "linear_attention_nnz": 27562, "linear_dense_total": 4718592, "linear_dense_nnz": 133500}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 81705, "linear_attention_total": 2359296, "linear_attention_nnz": 34151, "linear_dense_total": 4718592, "linear_dense_nnz": 47554}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64643, "linear_attention_total": 2359296, "linear_attention_nnz": 21311, "linear_dense_total": 4718592, "linear_dense_nnz": 43332}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 54039, "linear_attention_total": 2359296, "linear_attention_nnz": 17233, "linear_dense_total": 4718592, "linear_dense_nnz": 36806}}, "total_sparsity": 74.51397463933142, "linear_sparsity": 95.5330189363456}, "speed": {"eval_elapsed_time": 69.66989313997328, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-45000": {"stats": {"total": 108893186, "nnz": 52655769, "linear_total": 84934656, "linear_nnz": 28740096, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3704832, "linear_attention_total": 2359296, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 2131968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2818560, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 2327040}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3674112, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2494464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3592704, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2314752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2942976, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 2156544}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2844672, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 2156544}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2363904, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 1675776}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1738752, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1345536}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1665024, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 878592}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893952, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 402432}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059840, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1440768, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 949248}}, "total_sparsity": 51.64456938563631, "linear_sparsity": 66.162109375}, "speed": {"eval_elapsed_time": 15.83343747886829}, "opt_eval_metrics": {"exact_match": 78.1929990539262, "f1": 85.92206431273945}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 47478801, "linear_total": 84934656, "linear_nnz": 23566848, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2959872, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1485312}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2121216, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1728000}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3085824, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1906176}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3084288, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1806336}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2422272, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 1734144}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2302464, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1712640}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1878528, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1288704}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437696, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1044480}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1494528, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 708096}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 617472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 322560}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 947712, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 456192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1214976, "linear_attention_total": 2359296, "linear_attention_nnz": 442368, "linear_dense_total": 4718592, "linear_dense_nnz": 772608}}, "total_sparsity": 56.39874013788153, "linear_sparsity": 72.2529658564815}, "speed": {"eval_elapsed_time": 14.195255008991808}, "opt_eval_metrics": {"exact_match": 77.69157994323557, "f1": 85.75507572992562}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41730197, "linear_total": 84934656, "linear_nnz": 17822208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2202624, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2397696, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1218048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2302464, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1122816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1794048, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1460736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1155072, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1290240, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 503808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 423936, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 227328}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 806400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 904704, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 511488}}, "total_sparsity": 61.67786201057612, "linear_sparsity": 79.0165653935185}, "speed": {"eval_elapsed_time": 12.292132368078455}, "opt_eval_metrics": {"exact_match": 77.70104068117313, "f1": 85.6071153919288}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 33917936, "linear_total": 84934656, "linear_nnz": 9959406, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1111233, "linear_attention_total": 2359296, "linear_attention_nnz": 56754, "linear_dense_total": 4718592, "linear_dense_nnz": 1054479}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222867, "linear_attention_total": 2359296, "linear_attention_nnz": 116764, "linear_dense_total": 4718592, "linear_dense_nnz": 1106103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264439, "linear_attention_total": 2359296, "linear_attention_nnz": 127558, "linear_dense_total": 4718592, "linear_dense_nnz": 1136881}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1270104, "linear_attention_total": 2359296, "linear_attention_nnz": 163709, "linear_dense_total": 4718592, "linear_dense_nnz": 1106395}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1202300, "linear_attention_total": 2359296, "linear_attention_nnz": 158018, "linear_dense_total": 4718592, "linear_dense_nnz": 1044282}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1136195, "linear_attention_total": 2359296, "linear_attention_nnz": 125746, "linear_dense_total": 4718592, "linear_dense_nnz": 1010449}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 971117, "linear_attention_total": 2359296, "linear_attention_nnz": 110023, "linear_dense_total": 4718592, "linear_dense_nnz": 861094}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746075, "linear_attention_total": 2359296, "linear_attention_nnz": 113086, "linear_dense_total": 4718592, "linear_dense_nnz": 632989}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488971, "linear_attention_total": 2359296, "linear_attention_nnz": 81879, "linear_dense_total": 4718592, "linear_dense_nnz": 407092}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 250695, "linear_attention_total": 2359296, "linear_attention_nnz": 77365, "linear_dense_total": 4718592, "linear_dense_nnz": 173330}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 172793, "linear_attention_total": 2359296, "linear_attention_nnz": 50915, "linear_dense_total": 4718592, "linear_dense_nnz": 121878}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 122617, "linear_attention_total": 2359296, "linear_attention_nnz": 28303, "linear_dense_total": 4718592, "linear_dense_nnz": 94314}}, "total_sparsity": 68.85210429971255, "linear_sparsity": 88.27403739646628}, "speed": {"eval_elapsed_time": 75.02001089300029}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33825359, "linear_total": 84934656, "linear_nnz": 9866829, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1100628, "linear_attention_total": 2359296, "linear_attention_nnz": 56086, "linear_dense_total": 4718592, "linear_dense_nnz": 1044542}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211778, "linear_attention_total": 2359296, "linear_attention_nnz": 115328, "linear_dense_total": 4718592, "linear_dense_nnz": 1096450}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253069, "linear_attention_total": 2359296, "linear_attention_nnz": 125881, "linear_dense_total": 4718592, "linear_dense_nnz": 1127188}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258511, "linear_attention_total": 2359296, "linear_attention_nnz": 161525, "linear_dense_total": 4718592, "linear_dense_nnz": 1096986}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1191705, "linear_attention_total": 2359296, "linear_attention_nnz": 155911, "linear_dense_total": 4718592, "linear_dense_nnz": 1035794}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125428, "linear_attention_total": 2359296, "linear_attention_nnz": 123921, "linear_dense_total": 4718592, "linear_dense_nnz": 1001507}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 961919, "linear_attention_total": 2359296, "linear_attention_nnz": 108430, "linear_dense_total": 4718592, "linear_dense_nnz": 853489}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738628, "linear_attention_total": 2359296, "linear_attention_nnz": 111505, "linear_dense_total": 4718592, "linear_dense_nnz": 627123}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484188, "linear_attention_total": 2359296, "linear_attention_nnz": 80805, "linear_dense_total": 4718592, "linear_dense_nnz": 403383}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 247948, "linear_attention_total": 2359296, "linear_attention_nnz": 76456, "linear_dense_total": 4718592, "linear_dense_nnz": 171492}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 171235, "linear_attention_total": 2359296, "linear_attention_nnz": 50374, "linear_dense_total": 4718592, "linear_dense_nnz": 120861}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 121792, "linear_attention_total": 2359296, "linear_attention_nnz": 28038, "linear_dense_total": 4718592, "linear_dense_nnz": 93754}}, "total_sparsity": 68.93712063856779, "linear_sparsity": 88.38303530657733}, "speed": {"eval_elapsed_time": 75.69579442497343}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl150_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 29470276, "linear_total": 84934656, "linear_nnz": 5511746, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655184, "linear_attention_total": 2359296, "linear_attention_nnz": 30729, "linear_dense_total": 4718592, "linear_dense_nnz": 624455}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 733131, "linear_attention_total": 2359296, "linear_attention_nnz": 77742, "linear_dense_total": 4718592, "linear_dense_nnz": 655389}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730379, "linear_attention_total": 2359296, "linear_attention_nnz": 70206, "linear_dense_total": 4718592, "linear_dense_nnz": 660173}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 734451, "linear_attention_total": 2359296, "linear_attention_nnz": 106339, "linear_dense_total": 4718592, "linear_dense_nnz": 628112}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655863, "linear_attention_total": 2359296, "linear_attention_nnz": 81845, "linear_dense_total": 4718592, "linear_dense_nnz": 574018}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 606306, "linear_attention_total": 2359296, "linear_attention_nnz": 68554, "linear_dense_total": 4718592, "linear_dense_nnz": 537752}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 492846, "linear_attention_total": 2359296, "linear_attention_nnz": 58217, "linear_dense_total": 4718592, "linear_dense_nnz": 434629}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379389, "linear_attention_total": 2359296, "linear_attention_nnz": 65705, "linear_dense_total": 4718592, "linear_dense_nnz": 313684}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 243207, "linear_attention_total": 2359296, "linear_attention_nnz": 39483, "linear_dense_total": 4718592, "linear_dense_nnz": 203724}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 119606, "linear_attention_total": 2359296, "linear_attention_nnz": 46007, "linear_dense_total": 4718592, "linear_dense_nnz": 73599}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 89281, "linear_attention_total": 2359296, "linear_attention_nnz": 27892, "linear_dense_total": 4718592, "linear_dense_nnz": 61389}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 72103, "linear_attention_total": 2359296, "linear_attention_nnz": 20781, "linear_dense_total": 4718592, "linear_dense_nnz": 51322}}, "total_sparsity": 72.93652882926945, "linear_sparsity": 93.51060419906804}, "speed": {"eval_elapsed_time": 71.46695366402855, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-50000": {"stats": {"total": 108893186, "nnz": 40770050, "linear_total": 84934656, "linear_nnz": 16811520, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2190336, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1641984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 855552}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2171904, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 992256}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1900032, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1717248, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 1029120}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1789440, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1003008}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1319424, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 729600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1030656, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 637440}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1179648, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 393216}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 715776, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 224256}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}}, "total_sparsity": 62.55959486757969, "linear_sparsity": 80.20652488425925}, "speed": {"eval_elapsed_time": 12.685803183936514}, "opt_eval_metrics": {"exact_match": 75.79943235572375, "f1": 84.3797785815339}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 37334018, "linear_total": 84934656, "linear_nnz": 13375488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663488, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1282560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451520, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 566784}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1385472, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1265664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 675840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1069056, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.7150099364344, "linear_sparsity": 84.25202546296296}, "speed": {"eval_elapsed_time": 11.091999777941965}, "opt_eval_metrics": {"exact_match": 76.54683065279092, "f1": 84.56290825102765}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37189634, "linear_total": 84934656, "linear_nnz": 13231104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1658880, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1233408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1354752, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1387008, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 678912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.84760225492897, "linear_sparsity": 84.42201967592592}, "speed": {"eval_elapsed_time": 11.029475754010491}, "opt_eval_metrics": {"exact_match": 75.99810785241249, "f1": 84.26442986520863}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36773378, "linear_total": 84934656, "linear_nnz": 12814848, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1044480, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1177088, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1450496, "linear_attention_total": 2359296, "linear_attention_nnz": 492032, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652224, "linear_attention_total": 2359296, "linear_attention_nnz": 733696, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1511680, "linear_attention_total": 2359296, "linear_attention_nnz": 461056, "linear_dense_total": 4718592, "linear_dense_nnz": 1050624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1533952, "linear_attention_total": 2359296, "linear_attention_nnz": 580096, "linear_dense_total": 4718592, "linear_dense_nnz": 953856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1227520, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 764928}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 624384, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 700416, "linear_attention_total": 2359296, "linear_attention_nnz": 351744, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 479744, "linear_attention_total": 2359296, "linear_attention_nnz": 339968, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411392, "linear_attention_total": 2359296, "linear_attention_nnz": 276224, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 430080, "linear_attention_total": 2359296, "linear_attention_nnz": 178176, "linear_dense_total": 4718592, "linear_dense_nnz": 251904}}, "total_sparsity": 66.22986308803564, "linear_sparsity": 84.912109375}, "speed": {"eval_elapsed_time": 14.409963917918503}, "opt_eval_metrics": {"exact_match": 77.93755912961211, "f1": 86.0611894864831}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 47136529, "linear_total": 84934656, "linear_nnz": 23220736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1984512, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2164224, "linear_attention_total": 2359296, "linear_attention_nnz": 592896, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2625536, "linear_attention_total": 2359296, "linear_attention_nnz": 880640, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2992640, "linear_attention_total": 2359296, "linear_attention_nnz": 1230848, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2940928, "linear_attention_total": 2359296, "linear_attention_nnz": 1214464, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2535936, "linear_attention_total": 2359296, "linear_attention_nnz": 906240, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2213376, "linear_attention_total": 2359296, "linear_attention_nnz": 943104, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1923584, "linear_attention_total": 2359296, "linear_attention_nnz": 935936, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1419264, "linear_attention_total": 2359296, "linear_attention_nnz": 872448, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 883712, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667648, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 869376, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 56.713059162397904, "linear_sparsity": 72.66046971450618}, "speed": {"eval_elapsed_time": 19.07873147400096}, "opt_eval_metrics": {"exact_match": 80.27436140018922, "f1": 87.70461789964966}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46572775, "linear_total": 84934656, "linear_nnz": 22657536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1908736, "linear_attention_total": 2359296, "linear_attention_nnz": 627712, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2145280, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 1548288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2499072, "linear_attention_total": 2359296, "linear_attention_nnz": 789504, "linear_dense_total": 4718592, "linear_dense_nnz": 1709568}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2920960, "linear_attention_total": 2359296, "linear_attention_nnz": 1180672, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1204224, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2516992, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 1600512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2151936, "linear_attention_total": 2359296, "linear_attention_nnz": 909312, "linear_dense_total": 4718592, "linear_dense_nnz": 1242624}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1889792, "linear_attention_total": 2359296, "linear_attention_nnz": 917504, "linear_dense_total": 4718592, "linear_dense_nnz": 972288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1398272, "linear_attention_total": 2359296, "linear_attention_nnz": 856064, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858624, "linear_attention_total": 2359296, "linear_attention_nnz": 611328, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 634368, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 827392, "linear_attention_total": 2359296, "linear_attention_nnz": 268288, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}}, "total_sparsity": 57.23077199706509, "linear_sparsity": 73.32356770833333}, "speed": {"eval_elapsed_time": 18.581735570915043}, "opt_eval_metrics": {"exact_match": 79.80132450331126, "f1": 87.48291010744668}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-25000": {"stats": {"total": 108893186, "nnz": 97257474, "linear_total": 84934656, "linear_nnz": 73298944, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6452736, "linear_attention_total": 2359296, "linear_attention_nnz": 1889280, "linear_dense_total": 4718592, "linear_dense_nnz": 4563456}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6338048, "linear_attention_total": 2359296, "linear_attention_nnz": 1839104, "linear_dense_total": 4718592, "linear_dense_nnz": 4498944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6545920, "linear_attention_total": 2359296, "linear_attention_nnz": 2037760, "linear_dense_total": 4718592, "linear_dense_nnz": 4508160}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6564864, "linear_attention_total": 2359296, "linear_attention_nnz": 2056704, "linear_dense_total": 4718592, "linear_dense_nnz": 4508160}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6459904, "linear_attention_total": 2359296, "linear_attention_nnz": 2000896, "linear_dense_total": 4718592, "linear_dense_nnz": 4459008}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6377728, "linear_attention_total": 2359296, "linear_attention_nnz": 1963264, "linear_dense_total": 4718592, "linear_dense_nnz": 4414464}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6199808, "linear_attention_total": 2359296, "linear_attention_nnz": 1899008, "linear_dense_total": 4718592, "linear_dense_nnz": 4300800}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6070272, "linear_attention_total": 2359296, "linear_attention_nnz": 1938432, "linear_dense_total": 4718592, "linear_dense_nnz": 4131840}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5774848, "linear_attention_total": 2359296, "linear_attention_nnz": 1905664, "linear_dense_total": 4718592, "linear_dense_nnz": 3869184}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5543168, "linear_attention_total": 2359296, "linear_attention_nnz": 1779968, "linear_dense_total": 4718592, "linear_dense_nnz": 3763200}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5265920, "linear_attention_total": 2359296, "linear_attention_nnz": 1662464, "linear_dense_total": 4718592, "linear_dense_nnz": 3603456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5705728, "linear_attention_total": 2359296, "linear_attention_nnz": 1722880, "linear_dense_total": 4718592, "linear_dense_nnz": 3982848}}, "total_sparsity": 10.685436276976967, "linear_sparsity": 13.699604552469136}, "speed": {"eval_elapsed_time": 39.34595324099064}, "opt_eval_metrics": {"exact_match": 70.2554399243141, "f1": 79.54398424308184}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 58295010, "linear_total": 84934656, "linear_nnz": 34364416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2712576, "linear_attention_total": 2359296, "linear_attention_nnz": 934912, "linear_dense_total": 4718592, "linear_dense_nnz": 1777664}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2907136, "linear_attention_total": 2359296, "linear_attention_nnz": 738304, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4201472, "linear_attention_total": 2359296, "linear_attention_nnz": 1162240, "linear_dense_total": 4718592, "linear_dense_nnz": 3039232}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4531200, "linear_attention_total": 2359296, "linear_attention_nnz": 1366016, "linear_dense_total": 4718592, "linear_dense_nnz": 3165184}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4667392, "linear_attention_total": 2359296, "linear_attention_nnz": 1484800, "linear_dense_total": 4718592, "linear_dense_nnz": 3182592}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4599808, "linear_attention_total": 2359296, "linear_attention_nnz": 1414144, "linear_dense_total": 4718592, "linear_dense_nnz": 3185664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3828736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 2572288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2659328, "linear_attention_total": 2359296, "linear_attention_nnz": 991232, "linear_dense_total": 4718592, "linear_dense_nnz": 1668096}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1654784, "linear_attention_total": 2359296, "linear_attention_nnz": 966656, "linear_dense_total": 4718592, "linear_dense_nnz": 688128}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 927744, "linear_attention_total": 2359296, "linear_attention_nnz": 691200, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 873472, "linear_attention_total": 2359296, "linear_attention_nnz": 530432, "linear_dense_total": 4718592, "linear_dense_nnz": 343040}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 800768, "linear_attention_total": 2359296, "linear_attention_nnz": 378880, "linear_dense_total": 4718592, "linear_dense_nnz": 421888}}, "total_sparsity": 46.46587895775224, "linear_sparsity": 59.540171682098766}, "speed": {"eval_elapsed_time": 19.1117691679392}, "opt_eval_metrics": {"exact_match": 3.5383159886471143, "f1": 14.801770078824811}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 56885634, "linear_total": 84934656, "linear_nnz": 32956416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2584576, "linear_attention_total": 2359296, "linear_attention_nnz": 949248, "linear_dense_total": 4718592, "linear_dense_nnz": 1635328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2798592, "linear_attention_total": 2359296, "linear_attention_nnz": 750592, "linear_dense_total": 4718592, "linear_dense_nnz": 2048000}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4019200, "linear_attention_total": 2359296, "linear_attention_nnz": 1123328, "linear_dense_total": 4718592, "linear_dense_nnz": 2895872}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4274176, "linear_attention_total": 2359296, "linear_attention_nnz": 1306624, "linear_dense_total": 4718592, "linear_dense_nnz": 2967552}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4581376, "linear_attention_total": 2359296, "linear_attention_nnz": 1475584, "linear_dense_total": 4718592, "linear_dense_nnz": 3105792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4219904, "linear_attention_total": 2359296, "linear_attention_nnz": 1285120, "linear_dense_total": 4718592, "linear_dense_nnz": 2934784}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3736576, "linear_attention_total": 2359296, "linear_attention_nnz": 1235968, "linear_dense_total": 4718592, "linear_dense_nnz": 2500608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2587648, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1604608}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1627136, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 661504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 880640, "linear_attention_total": 2359296, "linear_attention_nnz": 650240, "linear_dense_total": 4718592, "linear_dense_nnz": 230400}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 862208, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 352256}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784384, "linear_attention_total": 2359296, "linear_attention_nnz": 363520, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}}, "total_sparsity": 47.76015277944021, "linear_sparsity": 61.19791666666667}, "speed": {"eval_elapsed_time": 18.703878964995965}, "opt_eval_metrics": {"exact_match": 4.985808893093662, "f1": 15.445793560425729}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 55520034, "linear_total": 84934656, "linear_nnz": 31592448, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2382848, "linear_attention_total": 2359296, "linear_attention_nnz": 889856, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2568192, "linear_attention_total": 2359296, "linear_attention_nnz": 717824, "linear_dense_total": 4718592, "linear_dense_nnz": 1850368}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3915776, "linear_attention_total": 2359296, "linear_attention_nnz": 1113088, "linear_dense_total": 4718592, "linear_dense_nnz": 2802688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4258816, "linear_attention_total": 2359296, "linear_attention_nnz": 1297408, "linear_dense_total": 4718592, "linear_dense_nnz": 2961408}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4300800, "linear_attention_total": 2359296, "linear_attention_nnz": 1402880, "linear_dense_total": 4718592, "linear_dense_nnz": 2897920}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4030464, "linear_attention_total": 2359296, "linear_attention_nnz": 1157120, "linear_dense_total": 4718592, "linear_dense_nnz": 2873344}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3661824, "linear_attention_total": 2359296, "linear_attention_nnz": 1187840, "linear_dense_total": 4718592, "linear_dense_nnz": 2473984}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2507776, "linear_attention_total": 2359296, "linear_attention_nnz": 979968, "linear_dense_total": 4718592, "linear_dense_nnz": 1527808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1562624, "linear_attention_total": 2359296, "linear_attention_nnz": 952320, "linear_dense_total": 4718592, "linear_dense_nnz": 610304}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 865280, "linear_attention_total": 2359296, "linear_attention_nnz": 642048, "linear_dense_total": 4718592, "linear_dense_nnz": 223232}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818176, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 328704}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 719872, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 388096}}, "total_sparsity": 49.0142257386059, "linear_sparsity": 62.80381944444444}, "speed": {"eval_elapsed_time": 18.49347779387608}, "opt_eval_metrics": {"exact_match": 3.869441816461684, "f1": 14.997695992590211}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 55329122, "linear_total": 84934656, "linear_nnz": 31404032, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2845696, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 2385920}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3173376, "linear_attention_total": 2359296, "linear_attention_nnz": 374784, "linear_dense_total": 4718592, "linear_dense_nnz": 2798592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3866624, "linear_attention_total": 2359296, "linear_attention_nnz": 411648, "linear_dense_total": 4718592, "linear_dense_nnz": 3454976}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4224000, "linear_attention_total": 2359296, "linear_attention_nnz": 727040, "linear_dense_total": 4718592, "linear_dense_nnz": 3496960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3954688, "linear_attention_total": 2359296, "linear_attention_nnz": 541696, "linear_dense_total": 4718592, "linear_dense_nnz": 3412992}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3993600, "linear_attention_total": 2359296, "linear_attention_nnz": 545792, "linear_dense_total": 4718592, "linear_dense_nnz": 3447808}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3427328, "linear_attention_total": 2359296, "linear_attention_nnz": 493568, "linear_dense_total": 4718592, "linear_dense_nnz": 2933760}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2641920, "linear_attention_total": 2359296, "linear_attention_nnz": 641024, "linear_dense_total": 4718592, "linear_dense_nnz": 2000896}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1293312, "linear_attention_total": 2359296, "linear_attention_nnz": 288768, "linear_dense_total": 4718592, "linear_dense_nnz": 1004544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 678912, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 339968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 671744, "linear_attention_total": 2359296, "linear_attention_nnz": 254976, "linear_dense_total": 4718592, "linear_dense_nnz": 416768}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 632832, "linear_attention_total": 2359296, "linear_attention_nnz": 165888, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}}, "total_sparsity": 49.1895461668281, "linear_sparsity": 63.025655864197525}, "speed": {"eval_elapsed_time": 14.250905813882127}, "opt_eval_metrics": {"exact_match": 2.251655629139073, "f1": 12.738616532125459}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 54788706, "linear_total": 84934656, "linear_nnz": 30864384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2661376, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 2226176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3087360, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 2727936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3895296, "linear_attention_total": 2359296, "linear_attention_nnz": 421888, "linear_dense_total": 4718592, "linear_dense_nnz": 3473408}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4162560, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 3451904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3985408, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 3437568}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3881984, "linear_attention_total": 2359296, "linear_attention_nnz": 556032, "linear_dense_total": 4718592, "linear_dense_nnz": 3325952}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3340288, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 2828288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2614272, "linear_attention_total": 2359296, "linear_attention_nnz": 622592, "linear_dense_total": 4718592, "linear_dense_nnz": 1991680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1256448, "linear_attention_total": 2359296, "linear_attention_nnz": 276480, "linear_dense_total": 4718592, "linear_dense_nnz": 979968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668672, "linear_attention_total": 2359296, "linear_attention_nnz": 337920, "linear_dense_total": 4718592, "linear_dense_nnz": 330752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 664576, "linear_attention_total": 2359296, "linear_attention_nnz": 252928, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 646144, "linear_attention_total": 2359296, "linear_attention_nnz": 158720, "linear_dense_total": 4718592, "linear_dense_nnz": 487424}}, "total_sparsity": 49.68582699012958, "linear_sparsity": 63.66102430555556}, "speed": {"eval_elapsed_time": 13.911966318031773}, "opt_eval_metrics": {"exact_match": 2.346263008514664, "f1": 13.343923627183553}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53430466, "linear_total": 84934656, "linear_nnz": 29507584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2533376, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 2119680}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2840576, "linear_attention_total": 2359296, "linear_attention_nnz": 364544, "linear_dense_total": 4718592, "linear_dense_nnz": 2476032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3843072, "linear_attention_total": 2359296, "linear_attention_nnz": 397312, "linear_dense_total": 4718592, "linear_dense_nnz": 3445760}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4069376, "linear_attention_total": 2359296, "linear_attention_nnz": 666624, "linear_dense_total": 4718592, "linear_dense_nnz": 3402752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3831808, "linear_attention_total": 2359296, "linear_attention_nnz": 492544, "linear_dense_total": 4718592, "linear_dense_nnz": 3339264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3714048, "linear_attention_total": 2359296, "linear_attention_nnz": 519168, "linear_dense_total": 4718592, "linear_dense_nnz": 3194880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3200000, "linear_attention_total": 2359296, "linear_attention_nnz": 448512, "linear_dense_total": 4718592, "linear_dense_nnz": 2751488}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415616, "linear_attention_total": 2359296, "linear_attention_nnz": 576512, "linear_dense_total": 4718592, "linear_dense_nnz": 1839104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211392, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619520, "linear_attention_total": 2359296, "linear_attention_nnz": 317440, "linear_dense_total": 4718592, "linear_dense_nnz": 302080}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 623616, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 386048}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 50.93314103235074, "linear_sparsity": 65.25848765432099}, "speed": {"eval_elapsed_time": 13.945766709977761}, "opt_eval_metrics": {"exact_match": 1.8826868495742668, "f1": 12.787819348714812}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 44512539, "linear_total": 84934656, "linear_nnz": 20599296, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1869312, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 1254912}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2022400, "linear_attention_total": 2359296, "linear_attention_nnz": 612352, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2259968, "linear_attention_total": 2359296, "linear_attention_nnz": 699392, "linear_dense_total": 4718592, "linear_dense_nnz": 1560576}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2543616, "linear_attention_total": 2359296, "linear_attention_nnz": 1010688, "linear_dense_total": 4718592, "linear_dense_nnz": 1532928}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2463744, "linear_attention_total": 2359296, "linear_attention_nnz": 927744, "linear_dense_total": 4718592, "linear_dense_nnz": 1536000}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2345472, "linear_attention_total": 2359296, "linear_attention_nnz": 872448, "linear_dense_total": 4718592, "linear_dense_nnz": 1473024}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2036224, "linear_attention_total": 2359296, "linear_attention_nnz": 867328, "linear_dense_total": 4718592, "linear_dense_nnz": 1168896}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1800192, "linear_attention_total": 2359296, "linear_attention_nnz": 897024, "linear_dense_total": 4718592, "linear_dense_nnz": 903168}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1165824, "linear_attention_total": 2359296, "linear_attention_nnz": 642048, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 734208, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 576512, "linear_attention_total": 2359296, "linear_attention_nnz": 386048, "linear_dense_total": 4718592, "linear_dense_nnz": 190464}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 781824, "linear_attention_total": 2359296, "linear_attention_nnz": 261120, "linear_dense_total": 4718592, "linear_dense_nnz": 520704}}, "total_sparsity": 59.12275080279128, "linear_sparsity": 75.7468894675926}, "speed": {"eval_elapsed_time": 17.345293765887618}, "opt_eval_metrics": {"exact_match": 79.36613055818354, "f1": 87.31339978481493}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 40733175, "linear_total": 84934656, "linear_nnz": 16822784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1394688, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1640960, "linear_attention_total": 2359296, "linear_attention_nnz": 539648, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1878528, "linear_attention_total": 2359296, "linear_attention_nnz": 657408, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 864256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1902592, "linear_attention_total": 2359296, "linear_attention_nnz": 686080, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601536, "linear_attention_total": 2359296, "linear_attention_nnz": 649216, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1507328, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908800, "linear_attention_total": 2359296, "linear_attention_nnz": 474112, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 354304, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591872, "linear_attention_total": 2359296, "linear_attention_nnz": 226304, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 62.593458327135366, "linear_sparsity": 80.19326292438271}, "speed": {"eval_elapsed_time": 15.926922732032835}, "opt_eval_metrics": {"exact_match": 78.68495742667928, "f1": 86.66781681977909}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40239113, "linear_total": 84934656, "linear_nnz": 16329216, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1344512, "linear_attention_total": 2359296, "linear_attention_nnz": 518144, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 516096, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842176, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2097664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1184256}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2056192, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 664576, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1565184, "linear_attention_total": 2359296, "linear_attention_nnz": 629760, "linear_dense_total": 4718592, "linear_dense_nnz": 935424}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1486336, "linear_attention_total": 2359296, "linear_attention_nnz": 787456, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 844288, "linear_attention_total": 2359296, "linear_attention_nnz": 415744, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 423936, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 472064, "linear_attention_total": 2359296, "linear_attention_nnz": 324608, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555520, "linear_attention_total": 2359296, "linear_attention_nnz": 209920, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}}, "total_sparsity": 63.04717083032174, "linear_sparsity": 80.7743778935185}, "speed": {"eval_elapsed_time": 15.737465491052717}, "opt_eval_metrics": {"exact_match": 78.80794701986756, "f1": 86.74156854566804}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 46385410, "linear_total": 84934656, "linear_nnz": 22426880, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2004992, "linear_attention_total": 2359296, "linear_attention_nnz": 594944, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2177280, "linear_attention_total": 2359296, "linear_attention_nnz": 672000, "linear_dense_total": 4718592, "linear_dense_nnz": 1505280}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2592256, "linear_attention_total": 2359296, "linear_attention_nnz": 859648, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2838016, "linear_attention_total": 2359296, "linear_attention_nnz": 1172992, "linear_dense_total": 4718592, "linear_dense_nnz": 1665024}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2712832, "linear_attention_total": 2359296, "linear_attention_nnz": 1026304, "linear_dense_total": 4718592, "linear_dense_nnz": 1686528}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2600448, "linear_attention_total": 2359296, "linear_attention_nnz": 976896, "linear_dense_total": 4718592, "linear_dense_nnz": 1623552}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2246144, "linear_attention_total": 2359296, "linear_attention_nnz": 955904, "linear_dense_total": 4718592, "linear_dense_nnz": 1290240}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842688, "linear_attention_total": 2359296, "linear_attention_nnz": 901120, "linear_dense_total": 4718592, "linear_dense_nnz": 941568}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1241856, "linear_attention_total": 2359296, "linear_attention_nnz": 718080, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 693760, "linear_attention_total": 2359296, "linear_attention_nnz": 475648, "linear_dense_total": 4718592, "linear_dense_nnz": 218112}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640768, "linear_attention_total": 2359296, "linear_attention_nnz": 441088, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835840, "linear_attention_total": 2359296, "linear_attention_nnz": 246016, "linear_dense_total": 4718592, "linear_dense_nnz": 589824}}, "total_sparsity": 57.4028351048522, "linear_sparsity": 73.59513647762346}, "speed": {"eval_elapsed_time": 19.68077167298179}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 87.07646648866317}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 43189250, "linear_total": 84934656, "linear_nnz": 19230720, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1584896, "linear_attention_total": 2359296, "linear_attention_nnz": 494336, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1917184, "linear_attention_total": 2359296, "linear_attention_nnz": 631552, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2092032, "linear_attention_total": 2359296, "linear_attention_nnz": 648192, "linear_dense_total": 4718592, "linear_dense_nnz": 1443840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2466816, "linear_attention_total": 2359296, "linear_attention_nnz": 1047552, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2403328, "linear_attention_total": 2359296, "linear_attention_nnz": 942592, "linear_dense_total": 4718592, "linear_dense_nnz": 1460736}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2211072, "linear_attention_total": 2359296, "linear_attention_nnz": 837888, "linear_dense_total": 4718592, "linear_dense_nnz": 1373184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1936640, "linear_attention_total": 2359296, "linear_attention_nnz": 841472, "linear_dense_total": 4718592, "linear_dense_nnz": 1095168}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661440, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 827904}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1084160, "linear_attention_total": 2359296, "linear_attention_nnz": 621824, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621056, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 188928}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 568064, "linear_attention_total": 2359296, "linear_attention_nnz": 411392, "linear_dense_total": 4718592, "linear_dense_nnz": 156672}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 684032, "linear_attention_total": 2359296, "linear_attention_nnz": 223232, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 60.33796825450584, "linear_sparsity": 77.3582175925926}, "speed": {"eval_elapsed_time": 18.309701333986595}, "opt_eval_metrics": {"exact_match": 78.74172185430463, "f1": 86.7418554019491}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42070530, "linear_total": 84934656, "linear_nnz": 18112000, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451008, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 970752}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835264, "linear_attention_total": 2359296, "linear_attention_nnz": 620288, "linear_dense_total": 4718592, "linear_dense_nnz": 1214976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2000384, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1374720}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2271232, "linear_attention_total": 2359296, "linear_attention_nnz": 933376, "linear_dense_total": 4718592, "linear_dense_nnz": 1337856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2267904, "linear_attention_total": 2359296, "linear_attention_nnz": 862464, "linear_dense_total": 4718592, "linear_dense_nnz": 1405440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081536, "linear_attention_total": 2359296, "linear_attention_nnz": 783616, "linear_dense_total": 4718592, "linear_dense_nnz": 1297920}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1807104, "linear_attention_total": 2359296, "linear_attention_nnz": 773376, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602048, "linear_attention_total": 2359296, "linear_attention_nnz": 811008, "linear_dense_total": 4718592, "linear_dense_nnz": 791040}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1009920, "linear_attention_total": 2359296, "linear_attention_nnz": 572160, "linear_dense_total": 4718592, "linear_dense_nnz": 437760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 543232, "linear_attention_total": 2359296, "linear_attention_nnz": 392704, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649472, "linear_attention_total": 2359296, "linear_attention_nnz": 214784, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}}, "total_sparsity": 61.365323630075444, "linear_sparsity": 78.67537133487654}, "speed": {"eval_elapsed_time": 18.103150750976056}, "opt_eval_metrics": {"exact_match": 78.92147587511826, "f1": 86.74888507219117}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41670402, "linear_total": 84934656, "linear_nnz": 17711872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1409024, "linear_attention_total": 2359296, "linear_attention_nnz": 468992, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1792000, "linear_attention_total": 2359296, "linear_attention_nnz": 606208, "linear_dense_total": 4718592, "linear_dense_nnz": 1185792}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1974272, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1348608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2231552, "linear_attention_total": 2359296, "linear_attention_nnz": 910592, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2209536, "linear_attention_total": 2359296, "linear_attention_nnz": 828672, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2046464, "linear_attention_total": 2359296, "linear_attention_nnz": 765440, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1764096, "linear_attention_total": 2359296, "linear_attention_nnz": 761088, "linear_dense_total": 4718592, "linear_dense_nnz": 1003008}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1573120, "linear_attention_total": 2359296, "linear_attention_nnz": 792832, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 986880, "linear_attention_total": 2359296, "linear_attention_nnz": 553728, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 572672, "linear_attention_total": 2359296, "linear_attention_nnz": 389888, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 525568, "linear_attention_total": 2359296, "linear_attention_nnz": 378112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}}, "total_sparsity": 61.73277361909495, "linear_sparsity": 79.14647231867285}, "speed": {"eval_elapsed_time": 17.77731288096402}, "opt_eval_metrics": {"exact_match": 78.76064333017976, "f1": 86.70283536757672}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41478658, "linear_total": 84934656, "linear_nnz": 17520128, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404160, "linear_attention_total": 2359296, "linear_attention_nnz": 465664, "linear_dense_total": 4718592, "linear_dense_nnz": 938496}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1766912, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1182720}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1961216, "linear_attention_total": 2359296, "linear_attention_nnz": 615680, "linear_dense_total": 4718592, "linear_dense_nnz": 1345536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 895488, "linear_dense_total": 4718592, "linear_dense_nnz": 1314816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2189824, "linear_attention_total": 2359296, "linear_attention_nnz": 812032, "linear_dense_total": 4718592, "linear_dense_nnz": 1377792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2038016, "linear_attention_total": 2359296, "linear_attention_nnz": 755456, "linear_dense_total": 4718592, "linear_dense_nnz": 1282560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1738240, "linear_attention_total": 2359296, "linear_attention_nnz": 739840, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571584, "linear_attention_total": 2359296, "linear_attention_nnz": 797440, "linear_dense_total": 4718592, "linear_dense_nnz": 774144}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 943872, "linear_attention_total": 2359296, "linear_attention_nnz": 513792, "linear_dense_total": 4718592, "linear_dense_nnz": 430080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563968, "linear_attention_total": 2359296, "linear_attention_nnz": 381184, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516352, "linear_attention_total": 2359296, "linear_attention_nnz": 370432, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 615680, "linear_attention_total": 2359296, "linear_attention_nnz": 200960, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}}, "total_sparsity": 61.90885809879785, "linear_sparsity": 79.37222704475309}, "speed": {"eval_elapsed_time": 17.746100773918442}, "opt_eval_metrics": {"exact_match": 78.85525070955535, "f1": 86.78368120366805}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40218943, "linear_total": 84934656, "linear_nnz": 16260413, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725694, "linear_attention_total": 2359296, "linear_attention_nnz": 36794, "linear_dense_total": 4718592, "linear_dense_nnz": 1688900}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959620, "linear_attention_total": 2359296, "linear_attention_nnz": 233028, "linear_dense_total": 4718592, "linear_dense_nnz": 1726592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969125, "linear_attention_total": 2359296, "linear_attention_nnz": 194318, "linear_dense_total": 4718592, "linear_dense_nnz": 1774807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2012358, "linear_attention_total": 2359296, "linear_attention_nnz": 270153, "linear_dense_total": 4718592, "linear_dense_nnz": 1742205}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860862, "linear_attention_total": 2359296, "linear_attention_nnz": 207935, "linear_dense_total": 4718592, "linear_dense_nnz": 1652927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815188, "linear_attention_total": 2359296, "linear_attention_nnz": 215427, "linear_dense_total": 4718592, "linear_dense_nnz": 1599761}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518978, "linear_attention_total": 2359296, "linear_attention_nnz": 114563, "linear_dense_total": 4718592, "linear_dense_nnz": 1404415}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307646, "linear_attention_total": 2359296, "linear_attention_nnz": 165011, "linear_dense_total": 4718592, "linear_dense_nnz": 1142635}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 946142, "linear_attention_total": 2359296, "linear_attention_nnz": 86589, "linear_dense_total": 4718592, "linear_dense_nnz": 859553}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531809, "linear_attention_total": 2359296, "linear_attention_nnz": 110020, "linear_dense_total": 4718592, "linear_dense_nnz": 421789}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 419075, "linear_attention_total": 2359296, "linear_attention_nnz": 89475, "linear_dense_total": 4718592, "linear_dense_nnz": 329600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 193916, "linear_attention_total": 2359296, "linear_attention_nnz": 45791, "linear_dense_total": 4718592, "linear_dense_nnz": 148125}}, "total_sparsity": 63.065693568741764, "linear_sparsity": 80.85538487375518}, "speed": {"eval_elapsed_time": 59.936431092966814}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39702836, "linear_total": 84934656, "linear_nnz": 15791104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125888, "linear_attention_total": 2359296, "linear_attention_nnz": 838656, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1188352, "linear_attention_total": 2359296, "linear_attention_nnz": 692224, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1694720, "linear_attention_total": 2359296, "linear_attention_nnz": 1089536, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1962496, "linear_attention_total": 2359296, "linear_attention_nnz": 1291264, "linear_dense_total": 4718592, "linear_dense_nnz": 671232}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2112512, "linear_attention_total": 2359296, "linear_attention_nnz": 1384448, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1783296, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1632768, "linear_attention_total": 2359296, "linear_attention_nnz": 1127424, "linear_dense_total": 4718592, "linear_dense_nnz": 505344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1333760, "linear_attention_total": 2359296, "linear_attention_nnz": 942080, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1204736, "linear_attention_total": 2359296, "linear_attention_nnz": 982016, "linear_dense_total": 4718592, "linear_dense_nnz": 222720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 729600, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 573952, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 449024, "linear_attention_total": 2359296, "linear_attention_nnz": 293888, "linear_dense_total": 4718592, "linear_dense_nnz": 155136}}, "total_sparsity": 63.53965068117302, "linear_sparsity": 81.40793788580247}, "speed": {"eval_elapsed_time": 17.66016855603084}, "opt_eval_metrics": {"exact_match": 79.20529801324503, "f1": 87.11181141207972}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 30451970, "linear_total": 84934656, "linear_nnz": 6493440, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 490240, "linear_attention_total": 2359296, "linear_attention_nnz": 259840, "linear_dense_total": 4718592, "linear_dense_nnz": 230400}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591104, "linear_attention_total": 2359296, "linear_attention_nnz": 225536, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 770048, "linear_attention_total": 2359296, "linear_attention_nnz": 286208, "linear_dense_total": 4718592, "linear_dense_nnz": 483840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 863488, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 747008, "linear_attention_total": 2359296, "linear_attention_nnz": 214016, "linear_dense_total": 4718592, "linear_dense_nnz": 532992}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 763392, "linear_attention_total": 2359296, "linear_attention_nnz": 285696, "linear_dense_total": 4718592, "linear_dense_nnz": 477696}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 499456, "linear_attention_total": 2359296, "linear_attention_nnz": 113920, "linear_dense_total": 4718592, "linear_dense_nnz": 385536}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 650752, "linear_attention_total": 2359296, "linear_attention_nnz": 303616, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 407808, "linear_attention_total": 2359296, "linear_attention_nnz": 162048, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 293888, "linear_attention_total": 2359296, "linear_attention_nnz": 206336, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 206336, "linear_attention_total": 2359296, "linear_attention_nnz": 117248, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 209920, "linear_attention_total": 2359296, "linear_attention_nnz": 103936, "linear_dense_total": 4718592, "linear_dense_nnz": 105984}}, "total_sparsity": 72.03500869191208, "linear_sparsity": 92.35478153935185}, "speed": {"eval_elapsed_time": 11.2053101999918}, "opt_eval_metrics": {"exact_match": 73.92620624408704, "f1": 83.01994135540168}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 30144002, "linear_total": 84934656, "linear_nnz": 6185472, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 462848, "linear_attention_total": 2359296, "linear_attention_nnz": 237056, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566784, "linear_attention_total": 2359296, "linear_attention_nnz": 219648, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 744960, "linear_attention_total": 2359296, "linear_attention_nnz": 278016, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 823552, "linear_attention_total": 2359296, "linear_attention_nnz": 379648, "linear_dense_total": 4718592, "linear_dense_nnz": 443904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708352, "linear_attention_total": 2359296, "linear_attention_nnz": 193792, "linear_dense_total": 4718592, "linear_dense_nnz": 514560}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 703488, "linear_attention_total": 2359296, "linear_attention_nnz": 247296, "linear_dense_total": 4718592, "linear_dense_nnz": 456192}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488448, "linear_attention_total": 2359296, "linear_attention_nnz": 118272, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 636160, "linear_attention_total": 2359296, "linear_attention_nnz": 296704, "linear_dense_total": 4718592, "linear_dense_nnz": 339456}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 393728, "linear_attention_total": 2359296, "linear_attention_nnz": 152576, "linear_dense_total": 4718592, "linear_dense_nnz": 241152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 278528, "linear_attention_total": 2359296, "linear_attention_nnz": 190976, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 190976, "linear_attention_total": 2359296, "linear_attention_nnz": 104960, "linear_dense_total": 4718592, "linear_dense_nnz": 86016}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 187648, "linear_attention_total": 2359296, "linear_attention_nnz": 90880, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 72.31782528614784, "linear_sparsity": 92.7173755787037}, "speed": {"eval_elapsed_time": 11.106899423059076}, "opt_eval_metrics": {"exact_match": 73.48155156102176, "f1": 82.77426887329388}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 30105858, "linear_total": 84934656, "linear_nnz": 6147328, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460800, "linear_attention_total": 2359296, "linear_attention_nnz": 235008, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566272, "linear_attention_total": 2359296, "linear_attention_nnz": 220672, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727040, "linear_attention_total": 2359296, "linear_attention_nnz": 260096, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 821760, "linear_attention_total": 2359296, "linear_attention_nnz": 380928, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 718080, "linear_attention_total": 2359296, "linear_attention_nnz": 215808, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 706048, "linear_attention_total": 2359296, "linear_attention_nnz": 251392, "linear_dense_total": 4718592, "linear_dense_nnz": 454656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 475648, "linear_attention_total": 2359296, "linear_attention_nnz": 103936, "linear_dense_total": 4718592, "linear_dense_nnz": 371712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 624384, "linear_attention_total": 2359296, "linear_attention_nnz": 284928, "linear_dense_total": 4718592, "linear_dense_nnz": 339456}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 388608, "linear_attention_total": 2359296, "linear_attention_nnz": 147456, "linear_dense_total": 4718592, "linear_dense_nnz": 241152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 268800, "linear_attention_total": 2359296, "linear_attention_nnz": 181248, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 199168, "linear_attention_total": 2359296, "linear_attention_nnz": 113152, "linear_dense_total": 4718592, "linear_dense_nnz": 86016}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 190720, "linear_attention_total": 2359296, "linear_attention_nnz": 93952, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 72.35285410787779, "linear_sparsity": 92.76228539737654}, "speed": {"eval_elapsed_time": 11.171043560025282}, "opt_eval_metrics": {"exact_match": 73.04635761589404, "f1": 82.29210924509454}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41908226, "linear_total": 84934656, "linear_nnz": 17949696, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081280, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 606720}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529856, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 841728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2437632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2115072, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1927680, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1448448, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 668160}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665088, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 173568}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 595968, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.51437244200017, "linear_sparsity": 78.86646412037037}, "speed": {"eval_elapsed_time": 13.23735156096518}, "opt_eval_metrics": {"exact_match": 78.05108798486282, "f1": 85.81174728555466}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41954306, "linear_total": 84934656, "linear_nnz": 17995776, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2079744, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1626624, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 840192}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434560, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1058304}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2116608, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1035264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1829376, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886208, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1099776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497600, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1210368, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 764928, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 546816, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.4720557446083, "linear_sparsity": 78.81221064814815}, "speed": {"eval_elapsed_time": 12.734316703979857}, "opt_eval_metrics": {"exact_match": 77.62535477767265, "f1": 85.49958980627748}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 38322466, "linear_total": 84934656, "linear_nnz": 14411776, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1015808, "linear_attention_total": 2359296, "linear_attention_nnz": 498688, "linear_dense_total": 4718592, "linear_dense_nnz": 517120}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1176576, "linear_attention_total": 2359296, "linear_attention_nnz": 510976, "linear_dense_total": 4718592, "linear_dense_nnz": 665600}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1846272, "linear_attention_total": 2359296, "linear_attention_nnz": 708608, "linear_dense_total": 4718592, "linear_dense_nnz": 1137664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1953792, "linear_attention_total": 2359296, "linear_attention_nnz": 832512, "linear_dense_total": 4718592, "linear_dense_nnz": 1121280}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1854464, "linear_attention_total": 2359296, "linear_attention_nnz": 739328, "linear_dense_total": 4718592, "linear_dense_nnz": 1115136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1754112, "linear_attention_total": 2359296, "linear_attention_nnz": 576512, "linear_dense_total": 4718592, "linear_dense_nnz": 1177600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522688, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 920576}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1283072, "linear_attention_total": 2359296, "linear_attention_nnz": 728064, "linear_dense_total": 4718592, "linear_dense_nnz": 555008}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 641024, "linear_attention_total": 2359296, "linear_attention_nnz": 465920, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455680, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 98304}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496640, "linear_attention_total": 2359296, "linear_attention_nnz": 311296, "linear_dense_total": 4718592, "linear_dense_nnz": 185344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 199680, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}}, "total_sparsity": 64.80728739078312, "linear_sparsity": 83.03192515432099}, "speed": {"eval_elapsed_time": 15.816457642940804}, "opt_eval_metrics": {"exact_match": 9.403973509933774, "f1": 21.602666371212333}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 37814786, "linear_total": 84934656, "linear_nnz": 13904896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 974848, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 485376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1168384, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1829888, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 1119232}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1906688, "linear_attention_total": 2359296, "linear_attention_nnz": 851968, "linear_dense_total": 4718592, "linear_dense_nnz": 1054720}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1767424, "linear_attention_total": 2359296, "linear_attention_nnz": 708608, "linear_dense_total": 4718592, "linear_dense_nnz": 1058816}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1684480, "linear_attention_total": 2359296, "linear_attention_nnz": 556032, "linear_dense_total": 4718592, "linear_dense_nnz": 1128448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1434624, "linear_attention_total": 2359296, "linear_attention_nnz": 569344, "linear_dense_total": 4718592, "linear_dense_nnz": 865280}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1205248, "linear_attention_total": 2359296, "linear_attention_nnz": 692224, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 635904, "linear_attention_total": 2359296, "linear_attention_nnz": 470016, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 430080, "linear_attention_total": 2359296, "linear_attention_nnz": 347136, "linear_dense_total": 4718592, "linear_dense_nnz": 82944}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 312320, "linear_dense_total": 4718592, "linear_dense_nnz": 189440}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 365568, "linear_attention_total": 2359296, "linear_attention_nnz": 174080, "linear_dense_total": 4718592, "linear_dense_nnz": 191488}}, "total_sparsity": 65.2735057269791, "linear_sparsity": 83.62871334876543}, "speed": {"eval_elapsed_time": 15.455383451189846}, "opt_eval_metrics": {"exact_match": 13.330179754020813, "f1": 25.30359311737543}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36247394, "linear_total": 84934656, "linear_nnz": 12339200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 897024, "linear_attention_total": 2359296, "linear_attention_nnz": 492544, "linear_dense_total": 4718592, "linear_dense_nnz": 404480}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1039360, "linear_attention_total": 2359296, "linear_attention_nnz": 488448, "linear_dense_total": 4718592, "linear_dense_nnz": 550912}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1658880, "linear_attention_total": 2359296, "linear_attention_nnz": 657408, "linear_dense_total": 4718592, "linear_dense_nnz": 1001472}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1664000, "linear_attention_total": 2359296, "linear_attention_nnz": 775168, "linear_dense_total": 4718592, "linear_dense_nnz": 888832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1506304, "linear_attention_total": 2359296, "linear_attention_nnz": 564224, "linear_dense_total": 4718592, "linear_dense_nnz": 942080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1456128, "linear_attention_total": 2359296, "linear_attention_nnz": 494592, "linear_dense_total": 4718592, "linear_dense_nnz": 961536}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1302528, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 790528}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1057792, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 419840}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 414720, "linear_dense_total": 4718592, "linear_dense_nnz": 156672}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 393216, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 79872}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 448512, "linear_attention_total": 2359296, "linear_attention_nnz": 267264, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 344064, "linear_attention_total": 2359296, "linear_attention_nnz": 153600, "linear_dense_total": 4718592, "linear_dense_nnz": 190464}}, "total_sparsity": 66.71289055680674, "linear_sparsity": 85.47212577160494}, "speed": {"eval_elapsed_time": 15.292296970030293}, "opt_eval_metrics": {"exact_match": 12.620624408703879, "f1": 24.953838930607546}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 36346370, "linear_total": 84934656, "linear_nnz": 12387840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1721856, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 950784, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1857024, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437696, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1113600, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1015296, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 425472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 731136, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 999936, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 213504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 296448, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 99840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 614400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 122880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}}, "total_sparsity": 66.62199781720042, "linear_sparsity": 85.4148582175926}, "speed": {"eval_elapsed_time": 11.27580028004013}, "opt_eval_metrics": {"exact_match": 76.75496688741723, "f1": 84.83470649534952}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 35750402, "linear_total": 84934656, "linear_nnz": 11791872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893952, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1247232, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 520704}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 978432, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 388608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 365568, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}}, "total_sparsity": 67.16929377013544, "linear_sparsity": 86.11653645833334}, "speed": {"eval_elapsed_time": 10.814438845962286}, "opt_eval_metrics": {"exact_match": 76.3197729422895, "f1": 84.62201750681498}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 35662850, "linear_total": 84934656, "linear_nnz": 11704320, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 897024, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 258048}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804800, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1184256, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1064448, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 474624}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 364032, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}}, "total_sparsity": 67.24969549518002, "linear_sparsity": 86.21961805555556}, "speed": {"eval_elapsed_time": 10.804757428006269}, "opt_eval_metrics": {"exact_match": 76.5279091769158, "f1": 84.6776690586996}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35638274, "linear_total": 84934656, "linear_nnz": 11679744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1586688, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 210432}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887808, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1175040, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 486912}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1062912, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 290304, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 93696}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360960, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 164352}}, "total_sparsity": 67.27226440045568, "linear_sparsity": 86.24855324074075}, "speed": {"eval_elapsed_time": 10.842320216004737}, "opt_eval_metrics": {"exact_match": 76.31031220435194, "f1": 84.63605545666391}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 36176130, "linear_total": 84934656, "linear_nnz": 12217600, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 911360, "linear_attention_total": 2359296, "linear_attention_nnz": 352256, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1284864, "linear_attention_total": 2359296, "linear_attention_nnz": 478464, "linear_dense_total": 4718592, "linear_dense_nnz": 806400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1386240, "linear_attention_total": 2359296, "linear_attention_nnz": 461568, "linear_dense_total": 4718592, "linear_dense_nnz": 924672}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1616128, "linear_attention_total": 2359296, "linear_attention_nnz": 763648, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1410048, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1415424, "linear_attention_total": 2359296, "linear_attention_nnz": 509184, "linear_dense_total": 4718592, "linear_dense_nnz": 906240}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1157632, "linear_attention_total": 2359296, "linear_attention_nnz": 458752, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1138688, "linear_attention_total": 2359296, "linear_attention_nnz": 550400, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 661504, "linear_attention_total": 2359296, "linear_attention_nnz": 311296, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 448512, "linear_attention_total": 2359296, "linear_attention_nnz": 319488, "linear_dense_total": 4718592, "linear_dense_nnz": 129024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379904, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 407296, "linear_attention_total": 2359296, "linear_attention_nnz": 160000, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}}, "total_sparsity": 66.7783345047871, "linear_sparsity": 85.6152946566358}, "speed": {"eval_elapsed_time": 14.883674454060383}, "opt_eval_metrics": {"exact_match": 76.43330179754021, "f1": 84.92125512821515}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33844482, "linear_total": 84934656, "linear_nnz": 9885952, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701184, "linear_attention_total": 2359296, "linear_attention_nnz": 295680, "linear_dense_total": 4718592, "linear_dense_nnz": 405504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042688, "linear_attention_total": 2359296, "linear_attention_nnz": 380672, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1087744, "linear_attention_total": 2359296, "linear_attention_nnz": 328960, "linear_dense_total": 4718592, "linear_dense_nnz": 758784}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1340672, "linear_attention_total": 2359296, "linear_attention_nnz": 612608, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1142784, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 811008}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1165312, "linear_attention_total": 2359296, "linear_attention_nnz": 411136, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908032, "linear_attention_total": 2359296, "linear_attention_nnz": 319744, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 956672, "linear_attention_total": 2359296, "linear_attention_nnz": 457472, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 557056, "linear_attention_total": 2359296, "linear_attention_nnz": 246784, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360192, "linear_attention_total": 2359296, "linear_attention_nnz": 252672, "linear_dense_total": 4718592, "linear_dense_nnz": 107520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315904, "linear_attention_total": 2359296, "linear_attention_nnz": 202240, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 307712, "linear_attention_total": 2359296, "linear_attention_nnz": 129536, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}}, "total_sparsity": 68.91955939281638, "linear_sparsity": 88.36052035108025}, "speed": {"eval_elapsed_time": 14.104866776964627}, "opt_eval_metrics": {"exact_match": 76.3670766319773, "f1": 84.90500621616839}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 50669453, "linear_total": 84934656, "linear_nnz": 26755584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3454464, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1979904}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2895360, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 2108928}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3497472, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2317824}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2995200, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 2110464}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2832384, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 2045952}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2528256, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 2036736}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2221056, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1631232}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1691136, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1297920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1684992, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 374784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1082880, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 591360}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1300992, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1006080}}, "total_sparsity": 53.46866515596302, "linear_sparsity": 68.4986255787037}, "speed": {"eval_elapsed_time": 15.358230478945188}, "opt_eval_metrics": {"exact_match": 78.12677388836329, "f1": 86.09062317714458}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 46753113, "linear_total": 84934656, "linear_nnz": 22841856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2904576, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1430016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2368512, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1582080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3084288, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1904640}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2600448, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1715712}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2244096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1654272}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096640, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1703424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1910784, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1476096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1082880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1534464, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 748032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523776, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 327168}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 990720, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1107456, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 812544}}, "total_sparsity": 57.06516200196401, "linear_sparsity": 73.10655381944444}, "speed": {"eval_elapsed_time": 13.718609332921915}, "opt_eval_metrics": {"exact_match": 78.31598864711448, "f1": 86.14732314693939}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 46580969, "linear_total": 84934656, "linear_nnz": 22669824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2883072, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1408512}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2342400, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1555968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3055104, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1875456}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2585088, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1700352}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1635840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2082816, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1689600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1901568, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1311744}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1468416, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1075200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1528320, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 741888}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520704, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 324096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1093632, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}}, "total_sparsity": 57.22324719197764, "linear_sparsity": 73.30910011574075}, "speed": {"eval_elapsed_time": 13.616545491036959}, "opt_eval_metrics": {"exact_match": 77.96594134342479, "f1": 85.85795020085484}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46293486, "linear_total": 84934656, "linear_nnz": 22382592, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850816, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1376256}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2323968, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1537536}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3022848, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1843200}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2557440, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1672704}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1620480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2068992, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1675776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790976, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1299456}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1462272, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1523712, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 737280}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 322560}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 970752, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 786432}}, "total_sparsity": 57.487251773494805, "linear_sparsity": 73.6472800925926}, "speed": {"eval_elapsed_time": 13.292588334996253}, "opt_eval_metrics": {"exact_match": 77.88079470198676, "f1": 85.81326419854291}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl300_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 26853628, "linear_total": 84934656, "linear_nnz": 2895098, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 357857, "linear_attention_total": 2359296, "linear_attention_nnz": 13195, "linear_dense_total": 4718592, "linear_dense_nnz": 344662}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 405482, "linear_attention_total": 2359296, "linear_attention_nnz": 53357, "linear_dense_total": 4718592, "linear_dense_nnz": 352125}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 395119, "linear_attention_total": 2359296, "linear_attention_nnz": 43981, "linear_dense_total": 4718592, "linear_dense_nnz": 351138}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 394117, "linear_attention_total": 2359296, "linear_attention_nnz": 71058, "linear_dense_total": 4718592, "linear_dense_nnz": 323059}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 335373, "linear_attention_total": 2359296, "linear_attention_nnz": 47705, "linear_dense_total": 4718592, "linear_dense_nnz": 287668}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 292526, "linear_attention_total": 2359296, "linear_attention_nnz": 40348, "linear_dense_total": 4718592, "linear_dense_nnz": 252178}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 238114, "linear_attention_total": 2359296, "linear_attention_nnz": 33002, "linear_dense_total": 4718592, "linear_dense_nnz": 205112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 188891, "linear_attention_total": 2359296, "linear_attention_nnz": 38753, "linear_dense_total": 4718592, "linear_dense_nnz": 150138}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 123365, "linear_attention_total": 2359296, "linear_attention_nnz": 22052, "linear_dense_total": 4718592, "linear_dense_nnz": 101313}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64415, "linear_attention_total": 2359296, "linear_attention_nnz": 28498, "linear_dense_total": 4718592, "linear_dense_nnz": 35917}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 53470, "linear_attention_total": 2359296, "linear_attention_nnz": 18747, "linear_dense_total": 4718592, "linear_dense_nnz": 34723}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 46369, "linear_attention_total": 2359296, "linear_attention_nnz": 15957, "linear_dense_total": 4718592, "linear_dense_nnz": 30412}}, "total_sparsity": 75.33947808267818, "linear_sparsity": 96.59138196780358}, "speed": {"eval_elapsed_time": 68.15529748401605, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 40641026, "linear_total": 84934656, "linear_nnz": 16682496, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1930752, "linear_attention_total": 2359296, "linear_attention_nnz": 1390080, "linear_dense_total": 4718592, "linear_dense_nnz": 540672}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1347840, "linear_attention_total": 2359296, "linear_attention_nnz": 622848, "linear_dense_total": 4718592, "linear_dense_nnz": 724992}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2423808, "linear_attention_total": 2359296, "linear_attention_nnz": 1506816, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1864704, "linear_attention_total": 2359296, "linear_attention_nnz": 966144, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1956096, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 990720}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742592, "linear_attention_total": 2359296, "linear_attention_nnz": 734976, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1323264, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835584, "linear_attention_total": 2359296, "linear_attention_nnz": 282624, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307904, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 403968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 536064, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 506880, "linear_attention_total": 2359296, "linear_attention_nnz": 322560, "linear_dense_total": 4718592, "linear_dense_nnz": 184320}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 761088, "linear_attention_total": 2359296, "linear_attention_nnz": 412416, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}}, "total_sparsity": 62.67808162027695, "linear_sparsity": 80.3584346064815}, "speed": {"eval_elapsed_time": 13.569498455966823}, "opt_eval_metrics": {"exact_match": 76.12109744560075, "f1": 84.59321000252827}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 39460610, "linear_total": 84934656, "linear_nnz": 15502080, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1801728, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1127424, "linear_attention_total": 2359296, "linear_attention_nnz": 471552, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2341632, "linear_attention_total": 2359296, "linear_attention_nnz": 1507584, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804032, "linear_attention_total": 2359296, "linear_attention_nnz": 960768, "linear_dense_total": 4718592, "linear_dense_nnz": 843264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899264, "linear_attention_total": 2359296, "linear_attention_nnz": 968448, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529088, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 564480, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738048, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 506880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 893952, "linear_dense_total": 4718592, "linear_dense_nnz": 384000}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668928, "linear_attention_total": 2359296, "linear_attention_nnz": 535296, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488448, "linear_attention_total": 2359296, "linear_attention_nnz": 319488, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 560640, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}}, "total_sparsity": 63.76209435179903, "linear_sparsity": 81.7482277199074}, "speed": {"eval_elapsed_time": 12.87935333198402}, "opt_eval_metrics": {"exact_match": 76.2251655629139, "f1": 84.80214537282716}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 39496706, "linear_total": 84934656, "linear_nnz": 15538176, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798656, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1122816, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 649728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2325504, "linear_attention_total": 2359296, "linear_attention_nnz": 1500672, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790208, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886976, "linear_attention_total": 2359296, "linear_attention_nnz": 963840, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522944, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 924672}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 558336, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 737280, "linear_attention_total": 2359296, "linear_attention_nnz": 235008, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1286400, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 382464}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665856, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 417792, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 559104, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 301056}}, "total_sparsity": 63.72894627217538, "linear_sparsity": 81.70572916666666}, "speed": {"eval_elapsed_time": 12.830008602933958}, "opt_eval_metrics": {"exact_match": 75.68590350047303, "f1": 84.47747389903205}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39419906, "linear_total": 84934656, "linear_nnz": 15461376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1800192, "linear_attention_total": 2359296, "linear_attention_nnz": 1334784, "linear_dense_total": 4718592, "linear_dense_nnz": 465408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1118976, "linear_attention_total": 2359296, "linear_attention_nnz": 473856, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2320896, "linear_attention_total": 2359296, "linear_attention_nnz": 1497600, "linear_dense_total": 4718592, "linear_dense_nnz": 823296}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1788672, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 832512}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1891584, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1520640, "linear_attention_total": 2359296, "linear_attention_nnz": 600576, "linear_dense_total": 4718592, "linear_dense_nnz": 920064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 732672, "linear_attention_total": 2359296, "linear_attention_nnz": 230400, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277184, "linear_attention_total": 2359296, "linear_attention_nnz": 897792, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 660480, "linear_attention_total": 2359296, "linear_attention_nnz": 528384, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555264, "linear_attention_total": 2359296, "linear_attention_nnz": 257280, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.79947410116185, "linear_sparsity": 81.79615162037037}, "speed": {"eval_elapsed_time": 12.832387157017365}, "opt_eval_metrics": {"exact_match": 76.0170293282876, "f1": 84.47498508158148}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39382274, "linear_total": 84934656, "linear_nnz": 15423744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1793280, "linear_attention_total": 2359296, "linear_attention_nnz": 1323264, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1115136, "linear_attention_total": 2359296, "linear_attention_nnz": 470016, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2321664, "linear_attention_total": 2359296, "linear_attention_nnz": 1496832, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1789440, "linear_attention_total": 2359296, "linear_attention_nnz": 960000, "linear_dense_total": 4718592, "linear_dense_nnz": 829440}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1843968, "linear_attention_total": 2359296, "linear_attention_nnz": 917760, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526016, "linear_attention_total": 2359296, "linear_attention_nnz": 607488, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 567552, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730368, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1279488, "linear_attention_total": 2359296, "linear_attention_nnz": 900096, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667392, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 556032, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.83403273736522, "linear_sparsity": 81.84045862268519}, "speed": {"eval_elapsed_time": 12.66309662302956}, "opt_eval_metrics": {"exact_match": 75.93188268684958, "f1": 84.50793088999642}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 45409666, "linear_total": 84934656, "linear_nnz": 21492736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1715200, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 1234944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895424, "linear_attention_total": 2359296, "linear_attention_nnz": 400384, "linear_dense_total": 4718592, "linear_dense_nnz": 1495040}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3012608, "linear_attention_total": 2359296, "linear_attention_nnz": 594944, "linear_dense_total": 4718592, "linear_dense_nnz": 2417664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3094528, "linear_attention_total": 2359296, "linear_attention_nnz": 813056, "linear_dense_total": 4718592, "linear_dense_nnz": 2281472}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2762752, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 2163712}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2707456, "linear_attention_total": 2359296, "linear_attention_nnz": 562176, "linear_dense_total": 4718592, "linear_dense_nnz": 2145280}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2233344, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1741824, "linear_attention_total": 2359296, "linear_attention_nnz": 678912, "linear_dense_total": 4718592, "linear_dense_nnz": 1062912}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 709632, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 370688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 524288, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 164864}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 593920, "linear_attention_total": 2359296, "linear_attention_nnz": 267264, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 163840, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}}, "total_sparsity": 58.29889117212532, "linear_sparsity": 74.6949749228395}, "speed": {"eval_elapsed_time": 14.564574090065435}, "opt_eval_metrics": {"exact_match": 3.9829706717123936, "f1": 16.040742076098137}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 44308674, "linear_total": 84934656, "linear_nnz": 20392960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1598464, "linear_attention_total": 2359296, "linear_attention_nnz": 458752, "linear_dense_total": 4718592, "linear_dense_nnz": 1139712}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825792, "linear_attention_total": 2359296, "linear_attention_nnz": 398336, "linear_dense_total": 4718592, "linear_dense_nnz": 1427456}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2854912, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 2257920}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2905088, "linear_attention_total": 2359296, "linear_attention_nnz": 781312, "linear_dense_total": 4718592, "linear_dense_nnz": 2123776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2643968, "linear_attention_total": 2359296, "linear_attention_nnz": 620544, "linear_dense_total": 4718592, "linear_dense_nnz": 2023424}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2543616, "linear_attention_total": 2359296, "linear_attention_nnz": 573440, "linear_dense_total": 4718592, "linear_dense_nnz": 1970176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2049024, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 1588224}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1708032, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 666624, "linear_attention_total": 2359296, "linear_attention_nnz": 307200, "linear_dense_total": 4718592, "linear_dense_nnz": 359424}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489472, "linear_attention_total": 2359296, "linear_attention_nnz": 327680, "linear_dense_total": 4718592, "linear_dense_nnz": 161792}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598016, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 509952, "linear_attention_total": 2359296, "linear_attention_nnz": 162816, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}}, "total_sparsity": 59.309966373837206, "linear_sparsity": 75.98982445987654}, "speed": {"eval_elapsed_time": 14.267318818019703}, "opt_eval_metrics": {"exact_match": 3.8883632923368023, "f1": 15.870241243967634}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 43172098, "linear_total": 84934656, "linear_nnz": 19257344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1469440, "linear_attention_total": 2359296, "linear_attention_nnz": 443392, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 396288, "linear_dense_total": 4718592, "linear_dense_nnz": 1296384}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692096, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 2113536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2728960, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 1973248}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2531328, "linear_attention_total": 2359296, "linear_attention_nnz": 565248, "linear_dense_total": 4718592, "linear_dense_nnz": 1966080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434048, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 1887232}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1978368, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 1502208}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638400, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1000448}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 620544, "linear_attention_total": 2359296, "linear_attention_nnz": 310272, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 457728, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 308224}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 467968, "linear_attention_total": 2359296, "linear_attention_nnz": 152576, "linear_dense_total": 4718592, "linear_dense_nnz": 315392}}, "total_sparsity": 60.35371946964616, "linear_sparsity": 77.3268711419753}, "speed": {"eval_elapsed_time": 14.267447887919843}, "opt_eval_metrics": {"exact_match": 4.768211920529802, "f1": 16.20417331173374}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42975330, "linear_total": 84934656, "linear_nnz": 19060736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1463296, "linear_attention_total": 2359296, "linear_attention_nnz": 455680, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1699840, "linear_attention_total": 2359296, "linear_attention_nnz": 399360, "linear_dense_total": 4718592, "linear_dense_nnz": 1300480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2724864, "linear_attention_total": 2359296, "linear_attention_nnz": 544768, "linear_dense_total": 4718592, "linear_dense_nnz": 2180096}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2670592, "linear_attention_total": 2359296, "linear_attention_nnz": 731136, "linear_dense_total": 4718592, "linear_dense_nnz": 1939456}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2498560, "linear_attention_total": 2359296, "linear_attention_nnz": 557056, "linear_dense_total": 4718592, "linear_dense_nnz": 1941504}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2407424, "linear_attention_total": 2359296, "linear_attention_nnz": 527360, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1928192, "linear_attention_total": 2359296, "linear_attention_nnz": 472064, "linear_dense_total": 4718592, "linear_dense_nnz": 1456128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 607232, "linear_dense_total": 4718592, "linear_dense_nnz": 977920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 317440}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455680, "linear_attention_total": 2359296, "linear_attention_nnz": 308224, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 240640, "linear_dense_total": 4718592, "linear_dense_nnz": 305152}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 329728}}, "total_sparsity": 60.53441764482857, "linear_sparsity": 77.55835262345678}, "speed": {"eval_elapsed_time": 14.274685407988727}, "opt_eval_metrics": {"exact_match": 3.8315988647114474, "f1": 15.72666349553447}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53955042, "linear_total": 84934656, "linear_nnz": 30029824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2392064, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 1844224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2719744, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 2172928}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3872768, "linear_attention_total": 2359296, "linear_attention_nnz": 675840, "linear_dense_total": 4718592, "linear_dense_nnz": 3196928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4077568, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 3111936}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4003840, "linear_attention_total": 2359296, "linear_attention_nnz": 896000, "linear_dense_total": 4718592, "linear_dense_nnz": 3107840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3832832, "linear_attention_total": 2359296, "linear_attention_nnz": 696320, "linear_dense_total": 4718592, "linear_dense_nnz": 3136512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3280896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 2525184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2510848, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1711104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257472, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 747520}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 420864, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 748544, "linear_attention_total": 2359296, "linear_attention_nnz": 356352, "linear_dense_total": 4718592, "linear_dense_nnz": 392192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 651264, "linear_attention_total": 2359296, "linear_attention_nnz": 217088, "linear_dense_total": 4718592, "linear_dense_nnz": 434176}}, "total_sparsity": 50.45140657377771, "linear_sparsity": 64.6436149691358}, "speed": {"eval_elapsed_time": 16.048874086001888}, "opt_eval_metrics": {"exact_match": 2.686849574266793, "f1": 12.912101470328441}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 55275810, "linear_total": 84934656, "linear_nnz": 31358976, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4016640, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 2542080}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3340800, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 2652672}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4039680, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2860032}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4041216, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2763264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3187200, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 2499072}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2870784, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 2477568}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2580480, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1990656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815552, "linear_attention_total": 2359296, "linear_attention_nnz": 245760, "linear_dense_total": 4718592, "linear_dense_nnz": 1569792}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1893888, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1107456}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 714240, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 517632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1281024, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 789504}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1577472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1282560}}, "total_sparsity": 49.23850423478289, "linear_sparsity": 63.078703703703695}, "speed": {"eval_elapsed_time": 16.857338295085356}, "opt_eval_metrics": {"exact_match": 78.21192052980132, "f1": 86.01032921346379}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47626001, "linear_total": 84934656, "linear_nnz": 23714304, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2895360, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2330112, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3240960, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2061312}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3095040, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1915392}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2291712, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1800192}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2221056, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1827840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1861632, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1300992, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 1104384}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637376, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 582144, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 385536}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1052160, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 560640}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1205760, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 910848}}, "total_sparsity": 56.2635617989908, "linear_sparsity": 72.07935474537037}, "speed": {"eval_elapsed_time": 13.838669790071435}, "opt_eval_metrics": {"exact_match": 77.78618732261117, "f1": 85.70556837897196}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41117954, "linear_total": 84934656, "linear_nnz": 17159424, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879296, "linear_attention_total": 2359296, "linear_attention_nnz": 1459968, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1487616, "linear_attention_total": 2359296, "linear_attention_nnz": 930048, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2451456, "linear_attention_total": 2359296, "linear_attention_nnz": 1651200, "linear_dense_total": 4718592, "linear_dense_nnz": 800256}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959168, "linear_attention_total": 2359296, "linear_attention_nnz": 1181952, "linear_dense_total": 4718592, "linear_dense_nnz": 777216}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1876992, "linear_attention_total": 2359296, "linear_attention_nnz": 996864, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 720384, "linear_dense_total": 4718592, "linear_dense_nnz": 886272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1242624, "linear_attention_total": 2359296, "linear_attention_nnz": 595968, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1026048, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1362432, "linear_attention_total": 2359296, "linear_attention_nnz": 1029120, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784128, "linear_attention_total": 2359296, "linear_attention_nnz": 673536, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563712, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.240103802270966, "linear_sparsity": 79.7969111689815}, "speed": {"eval_elapsed_time": 14.605739500955679}, "opt_eval_metrics": {"exact_match": 78.01324503311258, "f1": 85.85711399770457}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41037314, "linear_total": 84934656, "linear_nnz": 17078784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1881600, "linear_attention_total": 2359296, "linear_attention_nnz": 1460736, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488384, "linear_attention_total": 2359296, "linear_attention_nnz": 930816, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2430720, "linear_attention_total": 2359296, "linear_attention_nnz": 1636608, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1953024, "linear_attention_total": 2359296, "linear_attention_nnz": 1172736, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1822464, "linear_attention_total": 2359296, "linear_attention_nnz": 946944, "linear_dense_total": 4718592, "linear_dense_nnz": 875520}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602816, "linear_attention_total": 2359296, "linear_attention_nnz": 719616, "linear_dense_total": 4718592, "linear_dense_nnz": 883200}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1248768, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023744, "linear_attention_total": 2359296, "linear_attention_nnz": 530688, "linear_dense_total": 4718592, "linear_dense_nnz": 493056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360128, "linear_attention_total": 2359296, "linear_attention_nnz": 1026816, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 785664, "linear_attention_total": 2359296, "linear_attention_nnz": 675072, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 562176, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.31415802270676, "linear_sparsity": 79.89185474537037}, "speed": {"eval_elapsed_time": 14.334042510017753}, "opt_eval_metrics": {"exact_match": 78.02270577105014, "f1": 85.8869692285446}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 70240546, "linear_total": 84934656, "linear_nnz": 46302208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4364288, "linear_attention_total": 2359296, "linear_attention_nnz": 770048, "linear_dense_total": 4718592, "linear_dense_nnz": 3594240}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4529152, "linear_attention_total": 2359296, "linear_attention_nnz": 724992, "linear_dense_total": 4718592, "linear_dense_nnz": 3804160}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5151744, "linear_attention_total": 2359296, "linear_attention_nnz": 1142784, "linear_dense_total": 4718592, "linear_dense_nnz": 4008960}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5341184, "linear_attention_total": 2359296, "linear_attention_nnz": 1333248, "linear_dense_total": 4718592, "linear_dense_nnz": 4007936}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5542912, "linear_attention_total": 2359296, "linear_attention_nnz": 1481728, "linear_dense_total": 4718592, "linear_dense_nnz": 4061184}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5168128, "linear_attention_total": 2359296, "linear_attention_nnz": 1220608, "linear_dense_total": 4718592, "linear_dense_nnz": 3947520}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5190656, "linear_attention_total": 2359296, "linear_attention_nnz": 1311744, "linear_dense_total": 4718592, "linear_dense_nnz": 3878912}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4362240, "linear_attention_total": 2359296, "linear_attention_nnz": 1070080, "linear_dense_total": 4718592, "linear_dense_nnz": 3292160}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2987008, "linear_attention_total": 2359296, "linear_attention_nnz": 1006592, "linear_dense_total": 4718592, "linear_dense_nnz": 1980416}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437696, "linear_attention_total": 2359296, "linear_attention_nnz": 684032, "linear_dense_total": 4718592, "linear_dense_nnz": 753664}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193984, "linear_attention_total": 2359296, "linear_attention_nnz": 571392, "linear_dense_total": 4718592, "linear_dense_nnz": 622592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1033216, "linear_attention_total": 2359296, "linear_attention_nnz": 368640, "linear_dense_total": 4718592, "linear_dense_nnz": 664576}}, "total_sparsity": 35.49592166400568, "linear_sparsity": 45.48490547839506}, "speed": {"eval_elapsed_time": 18.76606000494212}, "opt_eval_metrics": {"exact_match": 1.6556291390728477, "f1": 9.690749776755068}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 65744386, "linear_total": 84934656, "linear_nnz": 41809920, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3775488, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 3140608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4079616, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 3477504}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4937728, "linear_attention_total": 2359296, "linear_attention_nnz": 1008640, "linear_dense_total": 4718592, "linear_dense_nnz": 3929088}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5286912, "linear_attention_total": 2359296, "linear_attention_nnz": 1197056, "linear_dense_total": 4718592, "linear_dense_nnz": 4089856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5135360, "linear_attention_total": 2359296, "linear_attention_nnz": 1181696, "linear_dense_total": 4718592, "linear_dense_nnz": 3953664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5012480, "linear_attention_total": 2359296, "linear_attention_nnz": 1005568, "linear_dense_total": 4718592, "linear_dense_nnz": 4006912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4720640, "linear_attention_total": 2359296, "linear_attention_nnz": 1043456, "linear_dense_total": 4718592, "linear_dense_nnz": 3677184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3708928, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 2777088}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2311168, "linear_attention_total": 2359296, "linear_attention_nnz": 862208, "linear_dense_total": 4718592, "linear_dense_nnz": 1448960}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1058816, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 458752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 951296, "linear_attention_total": 2359296, "linear_attention_nnz": 456704, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 831488, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 541696}}, "total_sparsity": 39.6248852522324, "linear_sparsity": 50.774016203703695}, "speed": {"eval_elapsed_time": 17.513682276010513}, "opt_eval_metrics": {"exact_match": 2.2327341532639546, "f1": 11.393739680219062}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 38110440, "linear_total": 84934656, "linear_nnz": 14151910, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1521793, "linear_attention_total": 2359296, "linear_attention_nnz": 87221, "linear_dense_total": 4718592, "linear_dense_nnz": 1434572}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637844, "linear_attention_total": 2359296, "linear_attention_nnz": 157517, "linear_dense_total": 4718592, "linear_dense_nnz": 1480327}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1723746, "linear_attention_total": 2359296, "linear_attention_nnz": 188172, "linear_dense_total": 4718592, "linear_dense_nnz": 1535574}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742961, "linear_attention_total": 2359296, "linear_attention_nnz": 230341, "linear_dense_total": 4718592, "linear_dense_nnz": 1512620}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1687428, "linear_attention_total": 2359296, "linear_attention_nnz": 240387, "linear_dense_total": 4718592, "linear_dense_nnz": 1447041}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1623377, "linear_attention_total": 2359296, "linear_attention_nnz": 195780, "linear_dense_total": 4718592, "linear_dense_nnz": 1427597}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1429982, "linear_attention_total": 2359296, "linear_attention_nnz": 184963, "linear_dense_total": 4718592, "linear_dense_nnz": 1245019}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130199, "linear_attention_total": 2359296, "linear_attention_nnz": 172954, "linear_dense_total": 4718592, "linear_dense_nnz": 957245}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 773896, "linear_attention_total": 2359296, "linear_attention_nnz": 138133, "linear_dense_total": 4718592, "linear_dense_nnz": 635763}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417863, "linear_attention_total": 2359296, "linear_attention_nnz": 112972, "linear_dense_total": 4718592, "linear_dense_nnz": 304891}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279992, "linear_attention_total": 2359296, "linear_attention_nnz": 75446, "linear_dense_total": 4718592, "linear_dense_nnz": 204546}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 182829, "linear_attention_total": 2359296, "linear_attention_nnz": 38439, "linear_dense_total": 4718592, "linear_dense_nnz": 144390}}, "total_sparsity": 65.00199746198996, "linear_sparsity": 83.3378850677867}, "speed": {"eval_elapsed_time": 78.46566343901213, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37366845, "linear_total": 84934656, "linear_nnz": 13408315, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1442154, "linear_attention_total": 2359296, "linear_attention_nnz": 79341, "linear_dense_total": 4718592, "linear_dense_nnz": 1362813}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1557975, "linear_attention_total": 2359296, "linear_attention_nnz": 146964, "linear_dense_total": 4718592, "linear_dense_nnz": 1411011}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637409, "linear_attention_total": 2359296, "linear_attention_nnz": 173655, "linear_dense_total": 4718592, "linear_dense_nnz": 1463754}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1655712, "linear_attention_total": 2359296, "linear_attention_nnz": 213353, "linear_dense_total": 4718592, "linear_dense_nnz": 1442359}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601748, "linear_attention_total": 2359296, "linear_attention_nnz": 221518, "linear_dense_total": 4718592, "linear_dense_nnz": 1380230}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1539647, "linear_attention_total": 2359296, "linear_attention_nnz": 179373, "linear_dense_total": 4718592, "linear_dense_nnz": 1360274}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1352289, "linear_attention_total": 2359296, "linear_attention_nnz": 168393, "linear_dense_total": 4718592, "linear_dense_nnz": 1183896}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1066215, "linear_attention_total": 2359296, "linear_attention_nnz": 159612, "linear_dense_total": 4718592, "linear_dense_nnz": 906603}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727923, "linear_attention_total": 2359296, "linear_attention_nnz": 127230, "linear_dense_total": 4718592, "linear_dense_nnz": 600693}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 390947, "linear_attention_total": 2359296, "linear_attention_nnz": 105257, "linear_dense_total": 4718592, "linear_dense_nnz": 285690}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 262617, "linear_attention_total": 2359296, "linear_attention_nnz": 70746, "linear_dense_total": 4718592, "linear_dense_nnz": 191871}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 173679, "linear_attention_total": 2359296, "linear_attention_nnz": 36271, "linear_dense_total": 4718592, "linear_dense_nnz": 137408}}, "total_sparsity": 65.68486388119823, "linear_sparsity": 84.21337575088313}, "speed": {"eval_elapsed_time": 78.30115663801553, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 45252556, "linear_total": 84934656, "linear_nnz": 21294026, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2152743, "linear_attention_total": 2359296, "linear_attention_nnz": 158912, "linear_dense_total": 4718592, "linear_dense_nnz": 1993831}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2265132, "linear_attention_total": 2359296, "linear_attention_nnz": 234395, "linear_dense_total": 4718592, "linear_dense_nnz": 2030737}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415512, "linear_attention_total": 2359296, "linear_attention_nnz": 301048, "linear_dense_total": 4718592, "linear_dense_nnz": 2114464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465567, "linear_attention_total": 2359296, "linear_attention_nnz": 358791, "linear_dense_total": 4718592, "linear_dense_nnz": 2106776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2457267, "linear_attention_total": 2359296, "linear_attention_nnz": 398673, "linear_dense_total": 4718592, "linear_dense_nnz": 2058594}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2410577, "linear_attention_total": 2359296, "linear_attention_nnz": 367333, "linear_dense_total": 4718592, "linear_dense_nnz": 2043244}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2206780, "linear_attention_total": 2359296, "linear_attention_nnz": 344288, "linear_dense_total": 4718592, "linear_dense_nnz": 1862492}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1819031, "linear_attention_total": 2359296, "linear_attention_nnz": 304514, "linear_dense_total": 4718592, "linear_dense_nnz": 1514517}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1364821, "linear_attention_total": 2359296, "linear_attention_nnz": 265513, "linear_dense_total": 4718592, "linear_dense_nnz": 1099308}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 828990, "linear_attention_total": 2359296, "linear_attention_nnz": 201714, "linear_dense_total": 4718592, "linear_dense_nnz": 627276}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574541, "linear_attention_total": 2359296, "linear_attention_nnz": 134277, "linear_dense_total": 4718592, "linear_dense_nnz": 440264}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 333065, "linear_attention_total": 2359296, "linear_attention_nnz": 63309, "linear_dense_total": 4718592, "linear_dense_nnz": 269756}}, "total_sparsity": 58.4431701722824, "linear_sparsity": 74.92893124804085}, "speed": {"eval_elapsed_time": 81.4040583850001, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 31863042, "linear_total": 84934656, "linear_nnz": 7904512, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 562688, "linear_attention_total": 2359296, "linear_attention_nnz": 260096, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852736, "linear_attention_total": 2359296, "linear_attention_nnz": 361216, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 836352, "linear_attention_total": 2359296, "linear_attention_nnz": 249600, "linear_dense_total": 4718592, "linear_dense_nnz": 586752}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1037824, "linear_attention_total": 2359296, "linear_attention_nnz": 487936, "linear_dense_total": 4718592, "linear_dense_nnz": 549888}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 981760, "linear_attention_total": 2359296, "linear_attention_nnz": 315136, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893184, "linear_attention_total": 2359296, "linear_attention_nnz": 329472, "linear_dense_total": 4718592, "linear_dense_nnz": 563712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621312, "linear_attention_total": 2359296, "linear_attention_nnz": 160512, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817664, "linear_attention_total": 2359296, "linear_attention_nnz": 407552, "linear_dense_total": 4718592, "linear_dense_nnz": 410112}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 446208, "linear_attention_total": 2359296, "linear_attention_nnz": 175872, "linear_dense_total": 4718592, "linear_dense_nnz": 270336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315392, "linear_attention_total": 2359296, "linear_attention_nnz": 218624, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279552, "linear_attention_total": 2359296, "linear_attention_nnz": 187392, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 259840, "linear_attention_total": 2359296, "linear_attention_nnz": 118528, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}}, "total_sparsity": 70.73917738066733, "linear_sparsity": 90.6934196566358}, "speed": {"eval_elapsed_time": 12.247032377053984}, "opt_eval_metrics": {"exact_match": 75.42100283822138, "f1": 84.06571558378387}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 38467586, "linear_total": 84934656, "linear_nnz": 14509056, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1740288, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 940032, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 448512}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1992192, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1728000, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1651200, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 717312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245696, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877056, "linear_attention_total": 2359296, "linear_attention_nnz": 442368, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049088, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 629760, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 233472}}, "total_sparsity": 64.67401918059409, "linear_sparsity": 82.9173900462963}, "speed": {"eval_elapsed_time": 12.096938933013007}, "opt_eval_metrics": {"exact_match": 77.9848628192999, "f1": 85.88807770994393}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 38065154, "linear_total": 84934656, "linear_nnz": 14106624, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1669632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 913920, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 422400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969152, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 631296}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1559040, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1219584, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 471552}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 955392, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1090560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 431616, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.04358500448319, "linear_sparsity": 83.3912037037037}, "speed": {"eval_elapsed_time": 24.926402524928562}, "opt_eval_metrics": {"exact_match": 78.29706717123936, "f1": 86.2648683969933}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38095874, "linear_total": 84934656, "linear_nnz": 14137344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907776, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1967616, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 591360}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1711104, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1608192, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1214976, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 625152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1161216, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 953856, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 413184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1041408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 482304, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 757248, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.0153738728886, "linear_sparsity": 83.35503472222221}, "speed": {"eval_elapsed_time": 11.562090583960526}, "opt_eval_metrics": {"exact_match": 77.43614001892148, "f1": 85.51882546766822}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 37879298, "linear_total": 84934656, "linear_nnz": 13920768, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1160960, "linear_attention_total": 2359296, "linear_attention_nnz": 454400, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245184, "linear_attention_total": 2359296, "linear_attention_nnz": 400384, "linear_dense_total": 4718592, "linear_dense_nnz": 844800}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553408, "linear_attention_total": 2359296, "linear_attention_nnz": 518144, "linear_dense_total": 4718592, "linear_dense_nnz": 1035264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1787648, "linear_attention_total": 2359296, "linear_attention_nnz": 803072, "linear_dense_total": 4718592, "linear_dense_nnz": 984576}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1640960, "linear_attention_total": 2359296, "linear_attention_nnz": 555008, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661696, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1015296}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383680, "linear_attention_total": 2359296, "linear_attention_nnz": 583424, "linear_dense_total": 4718592, "linear_dense_nnz": 800256}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1309440, "linear_attention_total": 2359296, "linear_attention_nnz": 652032, "linear_dense_total": 4718592, "linear_dense_nnz": 657408}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 689664, "linear_attention_total": 2359296, "linear_attention_nnz": 333312, "linear_dense_total": 4718592, "linear_dense_nnz": 356352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520960, "linear_attention_total": 2359296, "linear_attention_nnz": 382720, "linear_dense_total": 4718592, "linear_dense_nnz": 138240}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 461568, "linear_attention_total": 2359296, "linear_attention_nnz": 314112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 505600, "linear_attention_total": 2359296, "linear_attention_nnz": 203008, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}}, "total_sparsity": 65.21426235063046, "linear_sparsity": 83.61002604166666}, "speed": {"eval_elapsed_time": 14.720699563040398}, "opt_eval_metrics": {"exact_match": 76.82119205298014, "f1": 85.28474303662432}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35435778, "linear_total": 84934656, "linear_nnz": 11477248, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887040, "linear_attention_total": 2359296, "linear_attention_nnz": 384768, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1057792, "linear_attention_total": 2359296, "linear_attention_nnz": 355840, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1285888, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497088, "linear_attention_total": 2359296, "linear_attention_nnz": 672256, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1350912, "linear_attention_total": 2359296, "linear_attention_nnz": 418560, "linear_dense_total": 4718592, "linear_dense_nnz": 932352}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1395712, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1154816, "linear_attention_total": 2359296, "linear_attention_nnz": 498944, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059840, "linear_attention_total": 2359296, "linear_attention_nnz": 497664, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 609024, "linear_attention_total": 2359296, "linear_attention_nnz": 297216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 436224, "linear_attention_total": 2359296, "linear_attention_nnz": 316416, "linear_dense_total": 4718592, "linear_dense_nnz": 119808}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 256512, "linear_dense_total": 4718592, "linear_dense_nnz": 115200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371200, "linear_attention_total": 2359296, "linear_attention_nnz": 150016, "linear_dense_total": 4718592, "linear_dense_nnz": 221184}}, "total_sparsity": 67.45822277621669, "linear_sparsity": 86.4869671103395}, "speed": {"eval_elapsed_time": 13.966550998971798}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.29050695680083}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 49759613, "linear_total": 84934656, "linear_nnz": 25846272, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3251712, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2803200, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1918464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3320832, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3353088, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2075136}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2469888, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2322432, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1929216}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2098176, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1508352}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1641984, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1248768}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638912, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566784, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1075200, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 583680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1304064, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1009152}}, "total_sparsity": 54.304199529987116, "linear_sparsity": 69.56922743055556}, "speed": {"eval_elapsed_time": 14.872020053910092}, "opt_eval_metrics": {"exact_match": 77.8713339640492, "f1": 85.86552240887988}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 47529298, "linear_total": 84934656, "linear_nnz": 23617536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2446848, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1660416}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2978304, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1995264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2216448, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1724928}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1824768, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1333248}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526784, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1133568}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35236717199184, "linear_sparsity": 72.19328703703704}, "speed": {"eval_elapsed_time": 13.847230941057205}, "opt_eval_metrics": {"exact_match": 78.06054872280038, "f1": 85.94002543374285}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47521613, "linear_total": 84934656, "linear_nnz": 23609856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2445312, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1658880}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2976768, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1993728}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214912, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1723392}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1823232, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1525248, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1132032}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35942454654601, "linear_sparsity": 72.2023292824074}, "speed": {"eval_elapsed_time": 13.841004910878837}, "opt_eval_metrics": {"exact_match": 78.10785241248817, "f1": 86.00835164251778}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35398714, "linear_total": 84934656, "linear_nnz": 11493376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907264, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1074176, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253376, "linear_attention_total": 2359296, "linear_attention_nnz": 402432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1508352, "linear_attention_total": 2359296, "linear_attention_nnz": 681984, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1328640, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1422848, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1094656, "linear_attention_total": 2359296, "linear_attention_nnz": 449536, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1102848, "linear_attention_total": 2359296, "linear_attention_nnz": 577536, "linear_dense_total": 4718592, "linear_dense_nnz": 525312}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 628224, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 434176, "linear_attention_total": 2359296, "linear_attention_nnz": 320512, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 377344, "linear_attention_total": 2359296, "linear_attention_nnz": 256000, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 361472, "linear_attention_total": 2359296, "linear_attention_nnz": 146432, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 67.49225980035152, "linear_sparsity": 86.46797839506173}, "speed": {"eval_elapsed_time": 13.929598903981969}, "opt_eval_metrics": {"exact_match": 77.18070009460737, "f1": 85.6109462422114}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold_apme-sigmoied_threshold_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 61067266, "linear_total": 84934656, "linear_nnz": 37108736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3221504, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 2607104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3504128, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 2899968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4439040, "linear_attention_total": 2359296, "linear_attention_nnz": 730112, "linear_dense_total": 4718592, "linear_dense_nnz": 3708928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4859904, "linear_attention_total": 2359296, "linear_attention_nnz": 1044480, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4734976, "linear_attention_total": 2359296, "linear_attention_nnz": 1012736, "linear_dense_total": 4718592, "linear_dense_nnz": 3722240}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4660224, "linear_attention_total": 2359296, "linear_attention_nnz": 882688, "linear_dense_total": 4718592, "linear_dense_nnz": 3777536}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4239360, "linear_attention_total": 2359296, "linear_attention_nnz": 980992, "linear_dense_total": 4718592, "linear_dense_nnz": 3258368}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3137536, "linear_attention_total": 2359296, "linear_attention_nnz": 903168, "linear_dense_total": 4718592, "linear_dense_nnz": 2234368}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835008, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 1124352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877568, "linear_attention_total": 2359296, "linear_attention_nnz": 552960, "linear_dense_total": 4718592, "linear_dense_nnz": 324608}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852992, "linear_attention_total": 2359296, "linear_attention_nnz": 401408, "linear_dense_total": 4718592, "linear_dense_nnz": 451584}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746496, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 501760}}, "total_sparsity": 43.920030037508496, "linear_sparsity": 56.309076003086425}, "speed": {"eval_elapsed_time": 47.75363156700041}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-30000": {"stats": {"total": 108893186, "nnz": 67034114, "linear_total": 84934656, "linear_nnz": 43075584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4692480, "linear_attention_total": 2359296, "linear_attention_nnz": 892416, "linear_dense_total": 4718592, "linear_dense_nnz": 3800064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4379136, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 3657216}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4735488, "linear_attention_total": 2359296, "linear_attention_nnz": 920064, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4850688, "linear_attention_total": 2359296, "linear_attention_nnz": 1052160, "linear_dense_total": 4718592, "linear_dense_nnz": 3798528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4751616, "linear_attention_total": 2359296, "linear_attention_nnz": 1118976, "linear_dense_total": 4718592, "linear_dense_nnz": 3632640}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4547328, "linear_attention_total": 2359296, "linear_attention_nnz": 1017600, "linear_dense_total": 4718592, "linear_dense_nnz": 3529728}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4306944, "linear_attention_total": 2359296, "linear_attention_nnz": 1061376, "linear_dense_total": 4718592, "linear_dense_nnz": 3245568}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3525888, "linear_attention_total": 2359296, "linear_attention_nnz": 793344, "linear_dense_total": 4718592, "linear_dense_nnz": 2732544}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2793216, "linear_attention_total": 2359296, "linear_attention_nnz": 919296, "linear_dense_total": 4718592, "linear_dense_nnz": 1873920}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1507584, "linear_attention_total": 2359296, "linear_attention_nnz": 541440, "linear_dense_total": 4718592, "linear_dense_nnz": 966144}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130496, "linear_attention_total": 2359296, "linear_attention_nnz": 443904, "linear_dense_total": 4718592, "linear_dense_nnz": 686592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1854720, "linear_attention_total": 2359296, "linear_attention_nnz": 332544, "linear_dense_total": 4718592, "linear_dense_nnz": 1522176}}, "total_sparsity": 38.44048791078626, "linear_sparsity": 49.283854166666664}, "speed": {"eval_elapsed_time": 30.27796263305936}, "opt_eval_metrics": {"exact_match": 79.40397350993378, "f1": 86.95662988564573}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 43891202, "linear_total": 84934656, "linear_nnz": 19932672, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045184, "linear_attention_total": 2359296, "linear_attention_nnz": 427776, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102784, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 1708032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2424576, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 1955328}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2502912, "linear_attention_total": 2359296, "linear_attention_nnz": 579840, "linear_dense_total": 4718592, "linear_dense_nnz": 1923072}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2376960, "linear_attention_total": 2359296, "linear_attention_nnz": 539904, "linear_dense_total": 4718592, "linear_dense_nnz": 1837056}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2201856, "linear_attention_total": 2359296, "linear_attention_nnz": 424704, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1907712, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 1468416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1580544, "linear_attention_total": 2359296, "linear_attention_nnz": 428544, "linear_dense_total": 4718592, "linear_dense_nnz": 1152000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1095168, "linear_attention_total": 2359296, "linear_attention_nnz": 397824, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527616, "linear_attention_total": 2359296, "linear_attention_nnz": 235776, "linear_dense_total": 4718592, "linear_dense_nnz": 291840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 428544, "linear_attention_total": 2359296, "linear_attention_nnz": 182784, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738816, "linear_attention_total": 2359296, "linear_attention_nnz": 112128, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}}, "total_sparsity": 59.6933438975695, "linear_sparsity": 76.53175636574075}, "speed": {"eval_elapsed_time": 20.700779567006975}, "opt_eval_metrics": {"exact_match": 79.13907284768212, "f1": 86.92362610004827}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49229570, "linear_total": 84934656, "linear_nnz": 25271040, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214400, "linear_attention_total": 2359296, "linear_attention_nnz": 721408, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2390784, "linear_attention_total": 2359296, "linear_attention_nnz": 635136, "linear_dense_total": 4718592, "linear_dense_nnz": 1755648}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850560, "linear_attention_total": 2359296, "linear_attention_nnz": 972032, "linear_dense_total": 4718592, "linear_dense_nnz": 1878528}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3188736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 1932288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3149824, "linear_attention_total": 2359296, "linear_attention_nnz": 1260544, "linear_dense_total": 4718592, "linear_dense_nnz": 1889280}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 1784832}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2455040, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015744, "linear_attention_total": 2359296, "linear_attention_nnz": 988160, "linear_dense_total": 4718592, "linear_dense_nnz": 1027584}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1550080, "linear_attention_total": 2359296, "linear_attention_nnz": 903424, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 886784, "linear_attention_total": 2359296, "linear_attention_nnz": 636416, "linear_dense_total": 4718592, "linear_dense_nnz": 250368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 682752, "linear_attention_total": 2359296, "linear_attention_nnz": 484608, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 980224, "linear_attention_total": 2359296, "linear_attention_nnz": 313600, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}}, "total_sparsity": 54.79095450471988, "linear_sparsity": 70.2464916087963}, "speed": {"eval_elapsed_time": 20.624390312936157}, "opt_eval_metrics": {"exact_match": 80.5771050141911, "f1": 88.02575212811699}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 53943554, "linear_total": 84934656, "linear_nnz": 29985024, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3576576, "linear_attention_total": 2359296, "linear_attention_nnz": 840960, "linear_dense_total": 4718592, "linear_dense_nnz": 2735616}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070464, "linear_attention_total": 2359296, "linear_attention_nnz": 288768, "linear_dense_total": 4718592, "linear_dense_nnz": 2781696}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3222528, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 3024384}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3771648, "linear_attention_total": 2359296, "linear_attention_nnz": 770304, "linear_dense_total": 4718592, "linear_dense_nnz": 3001344}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3129600, "linear_attention_total": 2359296, "linear_attention_nnz": 393984, "linear_dense_total": 4718592, "linear_dense_nnz": 2735616}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2864640, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 2668032}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2801664, "linear_attention_total": 2359296, "linear_attention_nnz": 548352, "linear_dense_total": 4718592, "linear_dense_nnz": 2253312}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2118144, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 1920000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1646592, "linear_attention_total": 2359296, "linear_attention_nnz": 284160, "linear_dense_total": 4718592, "linear_dense_nnz": 1362432}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 861696, "linear_attention_total": 2359296, "linear_attention_nnz": 202752, "linear_dense_total": 4718592, "linear_dense_nnz": 658944}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 288000, "linear_dense_total": 4718592, "linear_dense_nnz": 907776}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725696, "linear_attention_total": 2359296, "linear_attention_nnz": 315648, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}}, "total_sparsity": 50.461956361530284, "linear_sparsity": 64.69636140046296}, "speed": {"eval_elapsed_time": 14.420848372974433}, "opt_eval_metrics": {"exact_match": 73.90728476821192, "f1": 82.48749394175648}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49808642, "linear_total": 84934656, "linear_nnz": 25850112, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2888448, "linear_attention_total": 2359296, "linear_attention_nnz": 652032, "linear_dense_total": 4718592, "linear_dense_nnz": 2236416}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2680320, "linear_attention_total": 2359296, "linear_attention_nnz": 293376, "linear_dense_total": 4718592, "linear_dense_nnz": 2386944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2864640, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 2666496}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3171072, "linear_attention_total": 2359296, "linear_attention_nnz": 530688, "linear_dense_total": 4718592, "linear_dense_nnz": 2640384}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2757888, "linear_attention_total": 2359296, "linear_attention_nnz": 392448, "linear_dense_total": 4718592, "linear_dense_nnz": 2365440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2569728, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 2373120}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2244096, "linear_attention_total": 2359296, "linear_attention_nnz": 310272, "linear_dense_total": 4718592, "linear_dense_nnz": 1933824}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1843968, "linear_attention_total": 2359296, "linear_attention_nnz": 197376, "linear_dense_total": 4718592, "linear_dense_nnz": 1646592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1384704, "linear_attention_total": 2359296, "linear_attention_nnz": 200448, "linear_dense_total": 4718592, "linear_dense_nnz": 1184256}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 204288, "linear_dense_total": 4718592, "linear_dense_nnz": 556032}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1115904, "linear_attention_total": 2359296, "linear_attention_nnz": 286464, "linear_dense_total": 4718592, "linear_dense_nnz": 829440}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1569024, "linear_attention_total": 2359296, "linear_attention_nnz": 315648, "linear_dense_total": 4718592, "linear_dense_nnz": 1253376}}, "total_sparsity": 54.25917467416189, "linear_sparsity": 69.56470630787037}, "speed": {"eval_elapsed_time": 12.429447512025945}, "opt_eval_metrics": {"exact_match": 70.05676442762535, "f1": 79.26883508935717}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 42173698, "linear_total": 84934656, "linear_nnz": 18215168, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1516544, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1758464, "linear_attention_total": 2359296, "linear_attention_nnz": 564992, "linear_dense_total": 4718592, "linear_dense_nnz": 1193472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2030080, "linear_attention_total": 2359296, "linear_attention_nnz": 646144, "linear_dense_total": 4718592, "linear_dense_nnz": 1383936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2328832, "linear_attention_total": 2359296, "linear_attention_nnz": 969472, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2283264, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1425408}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2099200, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 1396224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1846784, "linear_attention_total": 2359296, "linear_attention_nnz": 774656, "linear_dense_total": 4718592, "linear_dense_nnz": 1072128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1589760, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 783360}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 967424, "linear_attention_total": 2359296, "linear_attention_nnz": 520448, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 617216, "linear_attention_total": 2359296, "linear_attention_nnz": 435968, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 521984, "linear_attention_total": 2359296, "linear_attention_nnz": 354560, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655616, "linear_attention_total": 2359296, "linear_attention_nnz": 231680, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.27058124647028, "linear_sparsity": 78.55390383873457}, "speed": {"eval_elapsed_time": 16.997670065960847}, "opt_eval_metrics": {"exact_match": 79.2620624408704, "f1": 86.94475047733708}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42038274, "linear_total": 84934656, "linear_nnz": 18079744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1493248, "linear_attention_total": 2359296, "linear_attention_nnz": 519424, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757440, "linear_attention_total": 2359296, "linear_attention_nnz": 565504, "linear_dense_total": 4718592, "linear_dense_nnz": 1191936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028800, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1382400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2297088, "linear_attention_total": 2359296, "linear_attention_nnz": 937728, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2270464, "linear_attention_total": 2359296, "linear_attention_nnz": 846592, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081792, "linear_attention_total": 2359296, "linear_attention_nnz": 688640, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815296, "linear_attention_total": 2359296, "linear_attention_nnz": 744704, "linear_dense_total": 4718592, "linear_dense_nnz": 1070592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1613312, "linear_attention_total": 2359296, "linear_attention_nnz": 831488, "linear_dense_total": 4718592, "linear_dense_nnz": 781824}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 522496, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 594944, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 513792, "linear_attention_total": 2359296, "linear_attention_nnz": 346368, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 644096, "linear_attention_total": 2359296, "linear_attention_nnz": 220160, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.39494531824976, "linear_sparsity": 78.7133487654321}, "speed": {"eval_elapsed_time": 16.94853257900104}, "opt_eval_metrics": {"exact_match": 79.2620624408704, "f1": 86.97983371588884}}}, "base_speed_report": {"eval_elapsed_time": 38.708956059999764}} \ No newline at end of file diff --git a/analysis/files/results/results6.json b/analysis/files/results/results6.json deleted file mode 100644 index dc0dbe0d..00000000 --- a/analysis/files/results/results6.json +++ /dev/null @@ -1 +0,0 @@ -{"checkpoints": {"/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 60368184, "linear_total": 84934656, "linear_nnz": 36440832, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3788032, "linear_attention_total": 2359296, "linear_attention_nnz": 1170688, "linear_dense_total": 4718592, "linear_dense_nnz": 2617344}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3460352, "linear_attention_total": 2359296, "linear_attention_nnz": 956672, "linear_dense_total": 4718592, "linear_dense_nnz": 2503680}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4003072, "linear_attention_total": 2359296, "linear_attention_nnz": 1288960, "linear_dense_total": 4718592, "linear_dense_nnz": 2714112}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4128768, "linear_attention_total": 2359296, "linear_attention_nnz": 1483776, "linear_dense_total": 4718592, "linear_dense_nnz": 2644992}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4293120, "linear_attention_total": 2359296, "linear_attention_nnz": 1712640, "linear_dense_total": 4718592, "linear_dense_nnz": 2580480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4090112, "linear_attention_total": 2359296, "linear_attention_nnz": 1598720, "linear_dense_total": 4718592, "linear_dense_nnz": 2491392}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3693824, "linear_attention_total": 2359296, "linear_attention_nnz": 1647872, "linear_dense_total": 4718592, "linear_dense_nnz": 2045952}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2870272, "linear_attention_total": 2359296, "linear_attention_nnz": 1278976, "linear_dense_total": 4718592, "linear_dense_nnz": 1591296}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2240256, "linear_attention_total": 2359296, "linear_attention_nnz": 1321728, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1299200, "linear_attention_total": 2359296, "linear_attention_nnz": 826112, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1011456, "linear_attention_total": 2359296, "linear_attention_nnz": 676608, "linear_dense_total": 4718592, "linear_dense_nnz": 334848}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1562368, "linear_attention_total": 2359296, "linear_attention_nnz": 473344, "linear_dense_total": 4718592, "linear_dense_nnz": 1089024}}, "total_sparsity": 44.56201878416892, "linear_sparsity": 57.095449942129626}, "speed": {"eval_elapsed_time": 21.40440218592994}, "opt_eval_metrics": {"exact_match": 1.4853358561967833, "f1": 8.997331194701044}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53990689, "linear_total": 84934656, "linear_nnz": 30067968, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2828544, "linear_attention_total": 2359296, "linear_attention_nnz": 880896, "linear_dense_total": 4718592, "linear_dense_nnz": 1947648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2819840, "linear_attention_total": 2359296, "linear_attention_nnz": 849152, "linear_dense_total": 4718592, "linear_dense_nnz": 1970688}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3367424, "linear_attention_total": 2359296, "linear_attention_nnz": 1169408, "linear_dense_total": 4718592, "linear_dense_nnz": 2198016}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3521280, "linear_attention_total": 2359296, "linear_attention_nnz": 1352448, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3693824, "linear_attention_total": 2359296, "linear_attention_nnz": 1524992, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3554560, "linear_attention_total": 2359296, "linear_attention_nnz": 1511680, "linear_dense_total": 4718592, "linear_dense_nnz": 2042880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2935296, "linear_attention_total": 2359296, "linear_attention_nnz": 1336320, "linear_dense_total": 4718592, "linear_dense_nnz": 1598976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2452992, "linear_attention_total": 2359296, "linear_attention_nnz": 1178112, "linear_dense_total": 4718592, "linear_dense_nnz": 1274880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1836032, "linear_attention_total": 2359296, "linear_attention_nnz": 1134080, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1024000, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 321024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 812032, "linear_attention_total": 2359296, "linear_attention_nnz": 583168, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222144, "linear_attention_total": 2359296, "linear_attention_nnz": 397312, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}}, "total_sparsity": 50.41867082482094, "linear_sparsity": 64.59870515046296}, "speed": {"eval_elapsed_time": 20.464980722172186}, "opt_eval_metrics": {"exact_match": 2.185430463576159, "f1": 10.235907731105511}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53994017, "linear_total": 84934656, "linear_nnz": 30071296, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2825984, "linear_attention_total": 2359296, "linear_attention_nnz": 878336, "linear_dense_total": 4718592, "linear_dense_nnz": 1947648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2823424, "linear_attention_total": 2359296, "linear_attention_nnz": 852736, "linear_dense_total": 4718592, "linear_dense_nnz": 1970688}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3366400, "linear_attention_total": 2359296, "linear_attention_nnz": 1168384, "linear_dense_total": 4718592, "linear_dense_nnz": 2198016}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3529216, "linear_attention_total": 2359296, "linear_attention_nnz": 1360384, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3694080, "linear_attention_total": 2359296, "linear_attention_nnz": 1525248, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3562240, "linear_attention_total": 2359296, "linear_attention_nnz": 1519360, "linear_dense_total": 4718592, "linear_dense_nnz": 2042880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2944768, "linear_attention_total": 2359296, "linear_attention_nnz": 1345792, "linear_dense_total": 4718592, "linear_dense_nnz": 1598976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2450176, "linear_attention_total": 2359296, "linear_attention_nnz": 1175296, "linear_dense_total": 4718592, "linear_dense_nnz": 1274880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828864, "linear_attention_total": 2359296, "linear_attention_nnz": 1126912, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023488, "linear_attention_total": 2359296, "linear_attention_nnz": 702464, "linear_dense_total": 4718592, "linear_dense_nnz": 321024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 812032, "linear_attention_total": 2359296, "linear_attention_nnz": 583168, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1210624, "linear_attention_total": 2359296, "linear_attention_nnz": 385792, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}}, "total_sparsity": 50.41561461889819, "linear_sparsity": 64.5947868441358}, "speed": {"eval_elapsed_time": 20.669576363172382}, "opt_eval_metrics": {"exact_match": 1.8448438978240302, "f1": 9.536169896176048}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l20--2021-01-19--16-59-13/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 49113499, "linear_total": 84934656, "linear_nnz": 25174883, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2532837, "linear_attention_total": 2359296, "linear_attention_nnz": 278464, "linear_dense_total": 4718592, "linear_dense_nnz": 2254373}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2724403, "linear_attention_total": 2359296, "linear_attention_nnz": 411200, "linear_dense_total": 4718592, "linear_dense_nnz": 2313203}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2772181, "linear_attention_total": 2359296, "linear_attention_nnz": 388544, "linear_dense_total": 4718592, "linear_dense_nnz": 2383637}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2962889, "linear_attention_total": 2359296, "linear_attention_nnz": 616064, "linear_dense_total": 4718592, "linear_dense_nnz": 2346825}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2756799, "linear_attention_total": 2359296, "linear_attention_nnz": 475392, "linear_dense_total": 4718592, "linear_dense_nnz": 2281407}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2741284, "linear_attention_total": 2359296, "linear_attention_nnz": 485760, "linear_dense_total": 4718592, "linear_dense_nnz": 2255524}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2526246, "linear_attention_total": 2359296, "linear_attention_nnz": 436416, "linear_dense_total": 4718592, "linear_dense_nnz": 2089830}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2233017, "linear_attention_total": 2359296, "linear_attention_nnz": 473664, "linear_dense_total": 4718592, "linear_dense_nnz": 1759353}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652692, "linear_attention_total": 2359296, "linear_attention_nnz": 292096, "linear_dense_total": 4718592, "linear_dense_nnz": 1360596}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1056535, "linear_attention_total": 2359296, "linear_attention_nnz": 260864, "linear_dense_total": 4718592, "linear_dense_nnz": 795671}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 795434, "linear_attention_total": 2359296, "linear_attention_nnz": 207872, "linear_dense_total": 4718592, "linear_dense_nnz": 587562}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 420566, "linear_attention_total": 2359296, "linear_attention_nnz": 115648, "linear_dense_total": 4718592, "linear_dense_nnz": 304918}}, "total_sparsity": 54.89754611459343, "linear_sparsity": 70.35970452391072}, "speed": {"eval_elapsed_time": 16.686416601995006}, "opt_eval_metrics": {"exact_match": 0.1608325449385052, "f1": 6.123784219743515}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 50398933, "linear_total": 84934656, "linear_nnz": 26460853, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673633, "linear_attention_total": 2359296, "linear_attention_nnz": 341248, "linear_dense_total": 4718592, "linear_dense_nnz": 2332385}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850180, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 2387588}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2871056, "linear_attention_total": 2359296, "linear_attention_nnz": 412672, "linear_dense_total": 4718592, "linear_dense_nnz": 2458384}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114067, "linear_attention_total": 2359296, "linear_attention_nnz": 692736, "linear_dense_total": 4718592, "linear_dense_nnz": 2421331}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2853594, "linear_attention_total": 2359296, "linear_attention_nnz": 505088, "linear_dense_total": 4718592, "linear_dense_nnz": 2348506}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2871518, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 2322654}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2608144, "linear_attention_total": 2359296, "linear_attention_nnz": 469504, "linear_dense_total": 4718592, "linear_dense_nnz": 2138640}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2382631, "linear_attention_total": 2359296, "linear_attention_nnz": 552448, "linear_dense_total": 4718592, "linear_dense_nnz": 1830183}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757175, "linear_attention_total": 2359296, "linear_attention_nnz": 316672, "linear_dense_total": 4718592, "linear_dense_nnz": 1440503}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1151305, "linear_attention_total": 2359296, "linear_attention_nnz": 292096, "linear_dense_total": 4718592, "linear_dense_nnz": 859209}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 873504, "linear_attention_total": 2359296, "linear_attention_nnz": 227328, "linear_dense_total": 4718592, "linear_dense_nnz": 646176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 454046, "linear_attention_total": 2359296, "linear_attention_nnz": 128000, "linear_dense_total": 4718592, "linear_dense_nnz": 326046}}, "total_sparsity": 53.71709208691902, "linear_sparsity": 68.84563469592435}, "speed": {"eval_elapsed_time": 15.695100409910083}, "opt_eval_metrics": {"exact_match": 0.20813623462630085, "f1": 6.56563080893611}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 50358753, "linear_total": 84934656, "linear_nnz": 26420688, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2668105, "linear_attention_total": 2359296, "linear_attention_nnz": 335872, "linear_dense_total": 4718592, "linear_dense_nnz": 2332233}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2839080, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 2387496}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2866908, "linear_attention_total": 2359296, "linear_attention_nnz": 408576, "linear_dense_total": 4718592, "linear_dense_nnz": 2458332}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3103682, "linear_attention_total": 2359296, "linear_attention_nnz": 682496, "linear_dense_total": 4718592, "linear_dense_nnz": 2421186}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2853238, "linear_attention_total": 2359296, "linear_attention_nnz": 504832, "linear_dense_total": 4718592, "linear_dense_nnz": 2348406}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2880784, "linear_attention_total": 2359296, "linear_attention_nnz": 558336, "linear_dense_total": 4718592, "linear_dense_nnz": 2322448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2614378, "linear_attention_total": 2359296, "linear_attention_nnz": 475904, "linear_dense_total": 4718592, "linear_dense_nnz": 2138474}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2372808, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 1830088}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1752978, "linear_attention_total": 2359296, "linear_attention_nnz": 312576, "linear_dense_total": 4718592, "linear_dense_nnz": 1440402}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1147129, "linear_attention_total": 2359296, "linear_attention_nnz": 288000, "linear_dense_total": 4718592, "linear_dense_nnz": 859129}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870927, "linear_attention_total": 2359296, "linear_attention_nnz": 224768, "linear_dense_total": 4718592, "linear_dense_nnz": 646159}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 450671, "linear_attention_total": 2359296, "linear_attention_nnz": 124672, "linear_dense_total": 4718592, "linear_dense_nnz": 325999}}, "total_sparsity": 53.75399063078199, "linear_sparsity": 68.89292399088542}, "speed": {"eval_elapsed_time": 15.712638786062598}, "opt_eval_metrics": {"exact_match": 0.20813623462630085, "f1": 6.528119937113851}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 58360680, "linear_total": 84934656, "linear_nnz": 34416900, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3232823, "linear_attention_total": 2359296, "linear_attention_nnz": 405824, "linear_dense_total": 4718592, "linear_dense_nnz": 2826999}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3412647, "linear_attention_total": 2359296, "linear_attention_nnz": 543872, "linear_dense_total": 4718592, "linear_dense_nnz": 2868775}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3556851, "linear_attention_total": 2359296, "linear_attention_nnz": 613248, "linear_dense_total": 4718592, "linear_dense_nnz": 2943603}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3708198, "linear_attention_total": 2359296, "linear_attention_nnz": 791424, "linear_dense_total": 4718592, "linear_dense_nnz": 2916774}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3695959, "linear_attention_total": 2359296, "linear_attention_nnz": 819072, "linear_dense_total": 4718592, "linear_dense_nnz": 2876887}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3644598, "linear_attention_total": 2359296, "linear_attention_nnz": 788928, "linear_dense_total": 4718592, "linear_dense_nnz": 2855670}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3486486, "linear_attention_total": 2359296, "linear_attention_nnz": 761600, "linear_dense_total": 4718592, "linear_dense_nnz": 2724886}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114460, "linear_attention_total": 2359296, "linear_attention_nnz": 686464, "linear_dense_total": 4718592, "linear_dense_nnz": 2427996}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2616038, "linear_attention_total": 2359296, "linear_attention_nnz": 602496, "linear_dense_total": 4718592, "linear_dense_nnz": 2013542}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1775741, "linear_attention_total": 2359296, "linear_attention_nnz": 381632, "linear_dense_total": 4718592, "linear_dense_nnz": 1394109}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1407393, "linear_attention_total": 2359296, "linear_attention_nnz": 325760, "linear_dense_total": 4718592, "linear_dense_nnz": 1081633}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 765706, "linear_attention_total": 2359296, "linear_attention_nnz": 174016, "linear_dense_total": 4718592, "linear_dense_nnz": 591690}}, "total_sparsity": 46.405572153982156, "linear_sparsity": 59.47837829589844}, "speed": {"eval_elapsed_time": 19.1518919239752}, "opt_eval_metrics": {"exact_match": 0.11352885525070956, "f1": 5.85913997406195}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 58344499, "linear_total": 84934656, "linear_nnz": 34400721, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3231632, "linear_attention_total": 2359296, "linear_attention_nnz": 404736, "linear_dense_total": 4718592, "linear_dense_nnz": 2826896}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3411716, "linear_attention_total": 2359296, "linear_attention_nnz": 543040, "linear_dense_total": 4718592, "linear_dense_nnz": 2868676}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3557965, "linear_attention_total": 2359296, "linear_attention_nnz": 614464, "linear_dense_total": 4718592, "linear_dense_nnz": 2943501}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3706774, "linear_attention_total": 2359296, "linear_attention_nnz": 790144, "linear_dense_total": 4718592, "linear_dense_nnz": 2916630}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3693580, "linear_attention_total": 2359296, "linear_attention_nnz": 816832, "linear_dense_total": 4718592, "linear_dense_nnz": 2876748}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3641505, "linear_attention_total": 2359296, "linear_attention_nnz": 785920, "linear_dense_total": 4718592, "linear_dense_nnz": 2855585}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3484162, "linear_attention_total": 2359296, "linear_attention_nnz": 759424, "linear_dense_total": 4718592, "linear_dense_nnz": 2724738}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114894, "linear_attention_total": 2359296, "linear_attention_nnz": 687040, "linear_dense_total": 4718592, "linear_dense_nnz": 2427854}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2617066, "linear_attention_total": 2359296, "linear_attention_nnz": 603648, "linear_dense_total": 4718592, "linear_dense_nnz": 2013418}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1773359, "linear_attention_total": 2359296, "linear_attention_nnz": 379328, "linear_dense_total": 4718592, "linear_dense_nnz": 1394031}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404175, "linear_attention_total": 2359296, "linear_attention_nnz": 322624, "linear_dense_total": 4718592, "linear_dense_nnz": 1081551}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 763893, "linear_attention_total": 2359296, "linear_attention_nnz": 172288, "linear_dense_total": 4718592, "linear_dense_nnz": 591605}}, "total_sparsity": 46.42043166961797, "linear_sparsity": 59.49742705733687}, "speed": {"eval_elapsed_time": 19.15879200794734}, "opt_eval_metrics": {"exact_match": 0.15137180700094607, "f1": 5.769703802205948}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 59478503, "linear_total": 84934656, "linear_nnz": 35536574, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3362923, "linear_attention_total": 2359296, "linear_attention_nnz": 466432, "linear_dense_total": 4718592, "linear_dense_nnz": 2896491}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3511822, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 2933262}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3642442, "linear_attention_total": 2359296, "linear_attention_nnz": 636672, "linear_dense_total": 4718592, "linear_dense_nnz": 3005770}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3843335, "linear_attention_total": 2359296, "linear_attention_nnz": 857344, "linear_dense_total": 4718592, "linear_dense_nnz": 2985991}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3768311, "linear_attention_total": 2359296, "linear_attention_nnz": 829184, "linear_dense_total": 4718592, "linear_dense_nnz": 2939127}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3670401, "linear_attention_total": 2359296, "linear_attention_nnz": 754432, "linear_dense_total": 4718592, "linear_dense_nnz": 2915969}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3555086, "linear_attention_total": 2359296, "linear_attention_nnz": 767488, "linear_dense_total": 4718592, "linear_dense_nnz": 2787598}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3250234, "linear_attention_total": 2359296, "linear_attention_nnz": 752640, "linear_dense_total": 4718592, "linear_dense_nnz": 2497594}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2669249, "linear_attention_total": 2359296, "linear_attention_nnz": 553472, "linear_dense_total": 4718592, "linear_dense_nnz": 2115777}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1903656, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 1490216}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522534, "linear_attention_total": 2359296, "linear_attention_nnz": 353792, "linear_dense_total": 4718592, "linear_dense_nnz": 1168742}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 836581, "linear_attention_total": 2359296, "linear_attention_nnz": 204032, "linear_dense_total": 4718592, "linear_dense_nnz": 632549}}, "total_sparsity": 45.379040521415185, "linear_sparsity": 58.160101337197375}, "speed": {"eval_elapsed_time": 17.340857987990603}, "opt_eval_metrics": {"exact_match": 0.14191106906338694, "f1": 6.406978964749263}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 59470230, "linear_total": 84934656, "linear_nnz": 35528301, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3365714, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 2896466}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3508110, "linear_attention_total": 2359296, "linear_attention_nnz": 574976, "linear_dense_total": 4718592, "linear_dense_nnz": 2933134}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3640290, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 3005666}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3837370, "linear_attention_total": 2359296, "linear_attention_nnz": 851456, "linear_dense_total": 4718592, "linear_dense_nnz": 2985914}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3769702, "linear_attention_total": 2359296, "linear_attention_nnz": 830720, "linear_dense_total": 4718592, "linear_dense_nnz": 2938982}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3672353, "linear_attention_total": 2359296, "linear_attention_nnz": 756480, "linear_dense_total": 4718592, "linear_dense_nnz": 2915873}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3555719, "linear_attention_total": 2359296, "linear_attention_nnz": 768256, "linear_dense_total": 4718592, "linear_dense_nnz": 2787463}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3250893, "linear_attention_total": 2359296, "linear_attention_nnz": 753408, "linear_dense_total": 4718592, "linear_dense_nnz": 2497485}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666597, "linear_attention_total": 2359296, "linear_attention_nnz": 550912, "linear_dense_total": 4718592, "linear_dense_nnz": 2115685}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1903316, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 1490132}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1524282, "linear_attention_total": 2359296, "linear_attention_nnz": 355584, "linear_dense_total": 4718592, "linear_dense_nnz": 1168698}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 833955, "linear_attention_total": 2359296, "linear_attention_nnz": 201472, "linear_dense_total": 4718592, "linear_dense_nnz": 632483}}, "total_sparsity": 45.38663787466004, "linear_sparsity": 58.16984176635742}, "speed": {"eval_elapsed_time": 16.706481732893735}, "opt_eval_metrics": {"exact_match": 0.21759697256385999, "f1": 6.811902674676711}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41069735, "linear_total": 84934656, "linear_nnz": 17134148, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825423, "linear_attention_total": 2359296, "linear_attention_nnz": 185152, "linear_dense_total": 4718592, "linear_dense_nnz": 1640271}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2011232, "linear_attention_total": 2359296, "linear_attention_nnz": 309376, "linear_dense_total": 4718592, "linear_dense_nnz": 1701856}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2013521, "linear_attention_total": 2359296, "linear_attention_nnz": 266368, "linear_dense_total": 4718592, "linear_dense_nnz": 1747153}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2151481, "linear_attention_total": 2359296, "linear_attention_nnz": 452288, "linear_dense_total": 4718592, "linear_dense_nnz": 1699193}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1937929, "linear_attention_total": 2359296, "linear_attention_nnz": 315584, "linear_dense_total": 4718592, "linear_dense_nnz": 1622345}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1906344, "linear_attention_total": 2359296, "linear_attention_nnz": 324160, "linear_dense_total": 4718592, "linear_dense_nnz": 1582184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660767, "linear_attention_total": 2359296, "linear_attention_nnz": 264448, "linear_dense_total": 4718592, "linear_dense_nnz": 1396319}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1418922, "linear_attention_total": 2359296, "linear_attention_nnz": 312704, "linear_dense_total": 4718592, "linear_dense_nnz": 1106218}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 973188, "linear_attention_total": 2359296, "linear_attention_nnz": 176128, "linear_dense_total": 4718592, "linear_dense_nnz": 797060}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574608, "linear_attention_total": 2359296, "linear_attention_nnz": 178368, "linear_dense_total": 4718592, "linear_dense_nnz": 396240}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 423537, "linear_attention_total": 2359296, "linear_attention_nnz": 140224, "linear_dense_total": 4718592, "linear_dense_nnz": 283313}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 237196, "linear_attention_total": 2359296, "linear_attention_nnz": 82304, "linear_dense_total": 4718592, "linear_dense_nnz": 154892}}, "total_sparsity": 62.28438480989986, "linear_sparsity": 79.82667051715615}, "speed": {"eval_elapsed_time": 15.45761635596864}, "opt_eval_metrics": {"exact_match": 0.11352885525070956, "f1": 6.223158792862346}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40928357, "linear_total": 84934656, "linear_nnz": 16992855, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1811376, "linear_attention_total": 2359296, "linear_attention_nnz": 181120, "linear_dense_total": 4718592, "linear_dense_nnz": 1630256}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1999638, "linear_attention_total": 2359296, "linear_attention_nnz": 307392, "linear_dense_total": 4718592, "linear_dense_nnz": 1692246}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2004326, "linear_attention_total": 2359296, "linear_attention_nnz": 266880, "linear_dense_total": 4718592, "linear_dense_nnz": 1737446}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2132105, "linear_attention_total": 2359296, "linear_attention_nnz": 442304, "linear_dense_total": 4718592, "linear_dense_nnz": 1689801}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1922729, "linear_attention_total": 2359296, "linear_attention_nnz": 309632, "linear_dense_total": 4718592, "linear_dense_nnz": 1613097}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886520, "linear_attention_total": 2359296, "linear_attention_nnz": 313664, "linear_dense_total": 4718592, "linear_dense_nnz": 1572856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1646649, "linear_attention_total": 2359296, "linear_attention_nnz": 259072, "linear_dense_total": 4718592, "linear_dense_nnz": 1387577}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404939, "linear_attention_total": 2359296, "linear_attention_nnz": 306112, "linear_dense_total": 4718592, "linear_dense_nnz": 1098827}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 964630, "linear_attention_total": 2359296, "linear_attention_nnz": 173184, "linear_dense_total": 4718592, "linear_dense_nnz": 791446}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566118, "linear_attention_total": 2359296, "linear_attention_nnz": 172928, "linear_dense_total": 4718592, "linear_dense_nnz": 393190}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417586, "linear_attention_total": 2359296, "linear_attention_nnz": 136448, "linear_dense_total": 4718592, "linear_dense_nnz": 281138}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 236239, "linear_attention_total": 2359296, "linear_attention_nnz": 82304, "linear_dense_total": 4718592, "linear_dense_nnz": 153935}}, "total_sparsity": 62.414216625088, "linear_sparsity": 79.99302546183267}, "speed": {"eval_elapsed_time": 15.454639408970252}, "opt_eval_metrics": {"exact_match": 0.1608325449385052, "f1": 6.3945606916217175}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 40912185, "linear_total": 84934656, "linear_nnz": 16976675, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1810859, "linear_attention_total": 2359296, "linear_attention_nnz": 180736, "linear_dense_total": 4718592, "linear_dense_nnz": 1630123}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998023, "linear_attention_total": 2359296, "linear_attention_nnz": 305920, "linear_dense_total": 4718592, "linear_dense_nnz": 1692103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2001199, "linear_attention_total": 2359296, "linear_attention_nnz": 263936, "linear_dense_total": 4718592, "linear_dense_nnz": 1737263}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2132118, "linear_attention_total": 2359296, "linear_attention_nnz": 442496, "linear_dense_total": 4718592, "linear_dense_nnz": 1689622}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1919231, "linear_attention_total": 2359296, "linear_attention_nnz": 306304, "linear_dense_total": 4718592, "linear_dense_nnz": 1612927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1884897, "linear_attention_total": 2359296, "linear_attention_nnz": 312128, "linear_dense_total": 4718592, "linear_dense_nnz": 1572769}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1645758, "linear_attention_total": 2359296, "linear_attention_nnz": 258304, "linear_dense_total": 4718592, "linear_dense_nnz": 1387454}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404565, "linear_attention_total": 2359296, "linear_attention_nnz": 305856, "linear_dense_total": 4718592, "linear_dense_nnz": 1098709}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 963790, "linear_attention_total": 2359296, "linear_attention_nnz": 172480, "linear_dense_total": 4718592, "linear_dense_nnz": 791310}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 564071, "linear_attention_total": 2359296, "linear_attention_nnz": 170944, "linear_dense_total": 4718592, "linear_dense_nnz": 393127}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 416716, "linear_attention_total": 2359296, "linear_attention_nnz": 135616, "linear_dense_total": 4718592, "linear_dense_nnz": 281100}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 235448, "linear_attention_total": 2359296, "linear_attention_nnz": 81536, "linear_dense_total": 4718592, "linear_dense_nnz": 153912}}, "total_sparsity": 62.42906787574385, "linear_sparsity": 80.01207540064682}, "speed": {"eval_elapsed_time": 15.469862014055252}, "opt_eval_metrics": {"exact_match": 0.17029328287606432, "f1": 6.294767395480602}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39496838, "linear_total": 84934656, "linear_nnz": 15559744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1647204, "linear_attention_total": 2359296, "linear_attention_nnz": 145232, "linear_dense_total": 4718592, "linear_dense_nnz": 1501972}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842586, "linear_attention_total": 2359296, "linear_attention_nnz": 280192, "linear_dense_total": 4718592, "linear_dense_nnz": 1562394}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841850, "linear_attention_total": 2359296, "linear_attention_nnz": 234064, "linear_dense_total": 4718592, "linear_dense_nnz": 1607786}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1960732, "linear_attention_total": 2359296, "linear_attention_nnz": 386752, "linear_dense_total": 4718592, "linear_dense_nnz": 1573980}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1776766, "linear_attention_total": 2359296, "linear_attention_nnz": 281632, "linear_dense_total": 4718592, "linear_dense_nnz": 1495134}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1744230, "linear_attention_total": 2359296, "linear_attention_nnz": 288320, "linear_dense_total": 4718592, "linear_dense_nnz": 1455910}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518906, "linear_attention_total": 2359296, "linear_attention_nnz": 240864, "linear_dense_total": 4718592, "linear_dense_nnz": 1278042}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1276331, "linear_attention_total": 2359296, "linear_attention_nnz": 275424, "linear_dense_total": 4718592, "linear_dense_nnz": 1000907}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 878990, "linear_attention_total": 2359296, "linear_attention_nnz": 170816, "linear_dense_total": 4718592, "linear_dense_nnz": 708174}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496916, "linear_attention_total": 2359296, "linear_attention_nnz": 165920, "linear_dense_total": 4718592, "linear_dense_nnz": 330996}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360001, "linear_attention_total": 2359296, "linear_attention_nnz": 126288, "linear_dense_total": 4718592, "linear_dense_nnz": 233713}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 215232, "linear_attention_total": 2359296, "linear_attention_nnz": 73824, "linear_dense_total": 4718592, "linear_dense_nnz": 141408}}, "total_sparsity": 63.728825052469304, "linear_sparsity": 81.68033552758487}, "speed": {"eval_elapsed_time": 17.011177288135514}, "opt_eval_metrics": {"exact_match": 0.21759697256385999, "f1": 6.004282313368687}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39357122, "linear_total": 84934656, "linear_nnz": 15420094, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1634237, "linear_attention_total": 2359296, "linear_attention_nnz": 142224, "linear_dense_total": 4718592, "linear_dense_nnz": 1492013}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828361, "linear_attention_total": 2359296, "linear_attention_nnz": 275696, "linear_dense_total": 4718592, "linear_dense_nnz": 1552665}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825967, "linear_attention_total": 2359296, "linear_attention_nnz": 227984, "linear_dense_total": 4718592, "linear_dense_nnz": 1597983}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1943566, "linear_attention_total": 2359296, "linear_attention_nnz": 379616, "linear_dense_total": 4718592, "linear_dense_nnz": 1563950}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1761455, "linear_attention_total": 2359296, "linear_attention_nnz": 275824, "linear_dense_total": 4718592, "linear_dense_nnz": 1485631}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1729290, "linear_attention_total": 2359296, "linear_attention_nnz": 282736, "linear_dense_total": 4718592, "linear_dense_nnz": 1446554}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1504955, "linear_attention_total": 2359296, "linear_attention_nnz": 235856, "linear_dense_total": 4718592, "linear_dense_nnz": 1269099}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1263225, "linear_attention_total": 2359296, "linear_attention_nnz": 269520, "linear_dense_total": 4718592, "linear_dense_nnz": 993705}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870496, "linear_attention_total": 2359296, "linear_attention_nnz": 167616, "linear_dense_total": 4718592, "linear_dense_nnz": 702880}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489695, "linear_attention_total": 2359296, "linear_attention_nnz": 161552, "linear_dense_total": 4718592, "linear_dense_nnz": 328143}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 355803, "linear_attention_total": 2359296, "linear_attention_nnz": 124096, "linear_dense_total": 4718592, "linear_dense_nnz": 231707}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 213044, "linear_attention_total": 2359296, "linear_attention_nnz": 72608, "linear_dense_total": 4718592, "linear_dense_nnz": 140436}}, "total_sparsity": 63.85713060135829, "linear_sparsity": 81.84475604398752}, "speed": {"eval_elapsed_time": 17.041652875952423}, "opt_eval_metrics": {"exact_match": 0.23651844843897823, "f1": 5.845883008466231}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 39354055, "linear_total": 84934656, "linear_nnz": 15417031, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1634041, "linear_attention_total": 2359296, "linear_attention_nnz": 142224, "linear_dense_total": 4718592, "linear_dense_nnz": 1491817}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828346, "linear_attention_total": 2359296, "linear_attention_nnz": 275888, "linear_dense_total": 4718592, "linear_dense_nnz": 1552458}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825560, "linear_attention_total": 2359296, "linear_attention_nnz": 227744, "linear_dense_total": 4718592, "linear_dense_nnz": 1597816}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1942802, "linear_attention_total": 2359296, "linear_attention_nnz": 379008, "linear_dense_total": 4718592, "linear_dense_nnz": 1563794}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1761660, "linear_attention_total": 2359296, "linear_attention_nnz": 276192, "linear_dense_total": 4718592, "linear_dense_nnz": 1485468}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1728493, "linear_attention_total": 2359296, "linear_attention_nnz": 282096, "linear_dense_total": 4718592, "linear_dense_nnz": 1446397}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1504843, "linear_attention_total": 2359296, "linear_attention_nnz": 235856, "linear_dense_total": 4718592, "linear_dense_nnz": 1268987}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1262994, "linear_attention_total": 2359296, "linear_attention_nnz": 269456, "linear_dense_total": 4718592, "linear_dense_nnz": 993538}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870263, "linear_attention_total": 2359296, "linear_attention_nnz": 167520, "linear_dense_total": 4718592, "linear_dense_nnz": 702743}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489503, "linear_attention_total": 2359296, "linear_attention_nnz": 161424, "linear_dense_total": 4718592, "linear_dense_nnz": 328079}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 355610, "linear_attention_total": 2359296, "linear_attention_nnz": 123920, "linear_dense_total": 4718592, "linear_dense_nnz": 231690}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 212916, "linear_attention_total": 2359296, "linear_attention_nnz": 72512, "linear_dense_total": 4718592, "linear_dense_nnz": 140404}}, "total_sparsity": 63.859947122862216, "linear_sparsity": 81.84836234575437}, "speed": {"eval_elapsed_time": 17.044327389914542}, "opt_eval_metrics": {"exact_match": 0.21759697256385999, "f1": 5.825634861647094}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l10--2021-01-19--17-00-07/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 56967217, "linear_total": 84934656, "linear_nnz": 33019881, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3067235, "linear_attention_total": 2359296, "linear_attention_nnz": 356016, "linear_dense_total": 4718592, "linear_dense_nnz": 2711219}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3260347, "linear_attention_total": 2359296, "linear_attention_nnz": 506400, "linear_dense_total": 4718592, "linear_dense_nnz": 2753947}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3486676, "linear_attention_total": 2359296, "linear_attention_nnz": 658880, "linear_dense_total": 4718592, "linear_dense_nnz": 2827796}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3592390, "linear_attention_total": 2359296, "linear_attention_nnz": 782176, "linear_dense_total": 4718592, "linear_dense_nnz": 2810214}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3644732, "linear_attention_total": 2359296, "linear_attention_nnz": 874272, "linear_dense_total": 4718592, "linear_dense_nnz": 2770460}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3523230, "linear_attention_total": 2359296, "linear_attention_nnz": 772928, "linear_dense_total": 4718592, "linear_dense_nnz": 2750302}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3378315, "linear_attention_total": 2359296, "linear_attention_nnz": 767984, "linear_dense_total": 4718592, "linear_dense_nnz": 2610331}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2983346, "linear_attention_total": 2359296, "linear_attention_nnz": 687968, "linear_dense_total": 4718592, "linear_dense_nnz": 2295378}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465095, "linear_attention_total": 2359296, "linear_attention_nnz": 596368, "linear_dense_total": 4718592, "linear_dense_nnz": 1868727}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1650223, "linear_attention_total": 2359296, "linear_attention_nnz": 404448, "linear_dense_total": 4718592, "linear_dense_nnz": 1245775}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1262562, "linear_attention_total": 2359296, "linear_attention_nnz": 305952, "linear_dense_total": 4718592, "linear_dense_nnz": 956610}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705730, "linear_attention_total": 2359296, "linear_attention_nnz": 172864, "linear_dense_total": 4718592, "linear_dense_nnz": 532866}}, "total_sparsity": 47.6852325727709, "linear_sparsity": 61.12319451791268}, "speed": {"eval_elapsed_time": 21.49158539599739}, "opt_eval_metrics": {"exact_match": 0.10406811731315042, "f1": 4.938074420436028}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l40--2021-01-19--16-58-18/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 42014844, "linear_total": 84934656, "linear_nnz": 18080164, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1930333, "linear_attention_total": 2359296, "linear_attention_nnz": 211712, "linear_dense_total": 4718592, "linear_dense_nnz": 1718621}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2116878, "linear_attention_total": 2359296, "linear_attention_nnz": 345600, "linear_dense_total": 4718592, "linear_dense_nnz": 1771278}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2094823, "linear_attention_total": 2359296, "linear_attention_nnz": 278016, "linear_dense_total": 4718592, "linear_dense_nnz": 1816807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2266081, "linear_attention_total": 2359296, "linear_attention_nnz": 493312, "linear_dense_total": 4718592, "linear_dense_nnz": 1772769}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1986893, "linear_attention_total": 2359296, "linear_attention_nnz": 304128, "linear_dense_total": 4718592, "linear_dense_nnz": 1682765}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1992507, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 1635131}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1736239, "linear_attention_total": 2359296, "linear_attention_nnz": 278528, "linear_dense_total": 4718592, "linear_dense_nnz": 1457711}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529879, "linear_attention_total": 2359296, "linear_attention_nnz": 355072, "linear_dense_total": 4718592, "linear_dense_nnz": 1174807}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1051417, "linear_attention_total": 2359296, "linear_attention_nnz": 183552, "linear_dense_total": 4718592, "linear_dense_nnz": 867865}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 636321, "linear_attention_total": 2359296, "linear_attention_nnz": 196864, "linear_dense_total": 4718592, "linear_dense_nnz": 439457}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 483651, "linear_attention_total": 2359296, "linear_attention_nnz": 157696, "linear_dense_total": 4718592, "linear_dense_nnz": 325955}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 255142, "linear_attention_total": 2359296, "linear_attention_nnz": 90368, "linear_dense_total": 4718592, "linear_dense_nnz": 164774}}, "total_sparsity": 61.41646181607727, "linear_sparsity": 78.7128542676384}, "speed": {"eval_elapsed_time": 14.730565361911431}, "opt_eval_metrics": {"exact_match": 0.17975402081362346, "f1": 6.775574782409124}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_0/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39362145, "linear_total": 84934656, "linear_nnz": 15449344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1074688, "linear_attention_total": 2359296, "linear_attention_nnz": 796672, "linear_dense_total": 4718592, "linear_dense_nnz": 278016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236224, "linear_attention_total": 2359296, "linear_attention_nnz": 769280, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1635328, "linear_attention_total": 2359296, "linear_attention_nnz": 1056256, "linear_dense_total": 4718592, "linear_dense_nnz": 579072}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 1259264, "linear_dense_total": 4718592, "linear_dense_nnz": 640512}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2041856, "linear_attention_total": 2359296, "linear_attention_nnz": 1344512, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860096, "linear_attention_total": 2359296, "linear_attention_nnz": 1244160, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571072, "linear_attention_total": 2359296, "linear_attention_nnz": 1088768, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1299200, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 382464}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1150464, "linear_attention_total": 2359296, "linear_attention_nnz": 955392, "linear_dense_total": 4718592, "linear_dense_nnz": 195072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668928, "linear_attention_total": 2359296, "linear_attention_nnz": 590592, "linear_dense_total": 4718592, "linear_dense_nnz": 78336}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 548352, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 72192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 463360, "linear_attention_total": 2359296, "linear_attention_nnz": 308224, "linear_dense_total": 4718592, "linear_dense_nnz": 155136}}, "total_sparsity": 63.85251782420986, "linear_sparsity": 81.81031780478395}, "speed": {"eval_elapsed_time": 18.96496795094572}, "opt_eval_metrics": {"exact_match": 79.33774834437087, "f1": 87.07382313022637}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l20--2021-01-19--17-00-34/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 47377613, "linear_total": 84934656, "linear_nnz": 23436196, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2342065, "linear_attention_total": 2359296, "linear_attention_nnz": 233808, "linear_dense_total": 4718592, "linear_dense_nnz": 2108257}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2536721, "linear_attention_total": 2359296, "linear_attention_nnz": 370912, "linear_dense_total": 4718592, "linear_dense_nnz": 2165809}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2607352, "linear_attention_total": 2359296, "linear_attention_nnz": 368864, "linear_dense_total": 4718592, "linear_dense_nnz": 2238488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2740822, "linear_attention_total": 2359296, "linear_attention_nnz": 528528, "linear_dense_total": 4718592, "linear_dense_nnz": 2212294}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2662766, "linear_attention_total": 2359296, "linear_attention_nnz": 515168, "linear_dense_total": 4718592, "linear_dense_nnz": 2147598}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2582248, "linear_attention_total": 2359296, "linear_attention_nnz": 456576, "linear_dense_total": 4718592, "linear_dense_nnz": 2125672}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2384302, "linear_attention_total": 2359296, "linear_attention_nnz": 426512, "linear_dense_total": 4718592, "linear_dense_nnz": 1957790}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045939, "linear_attention_total": 2359296, "linear_attention_nnz": 424416, "linear_dense_total": 4718592, "linear_dense_nnz": 1621523}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1531552, "linear_attention_total": 2359296, "linear_attention_nnz": 311248, "linear_dense_total": 4718592, "linear_dense_nnz": 1220304}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 936640, "linear_attention_total": 2359296, "linear_attention_nnz": 249120, "linear_dense_total": 4718592, "linear_dense_nnz": 687520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 686193, "linear_attention_total": 2359296, "linear_attention_nnz": 189856, "linear_dense_total": 4718592, "linear_dense_nnz": 496337}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379596, "linear_attention_total": 2359296, "linear_attention_nnz": 106192, "linear_dense_total": 4718592, "linear_dense_nnz": 273404}}, "total_sparsity": 56.49166422589565, "linear_sparsity": 72.40679234634212}, "speed": {"eval_elapsed_time": 19.215512146009132}, "opt_eval_metrics": {"exact_match": 0.11352885525070956, "f1": 5.23316199013419}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42067458, "linear_total": 84934656, "linear_nnz": 18108928, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437184, "linear_attention_total": 2359296, "linear_attention_nnz": 472576, "linear_dense_total": 4718592, "linear_dense_nnz": 964608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1754624, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015488, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2276608, "linear_attention_total": 2359296, "linear_attention_nnz": 951040, "linear_dense_total": 4718592, "linear_dense_nnz": 1325568}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2280448, "linear_attention_total": 2359296, "linear_attention_nnz": 861184, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2123008, "linear_attention_total": 2359296, "linear_attention_nnz": 779008, "linear_dense_total": 4718592, "linear_dense_nnz": 1344000}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841152, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1041408}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553664, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042432, "linear_attention_total": 2359296, "linear_attention_nnz": 610816, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 584960, "linear_attention_total": 2359296, "linear_attention_nnz": 405248, "linear_dense_total": 4718592, "linear_dense_nnz": 179712}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 540928, "linear_attention_total": 2359296, "linear_attention_nnz": 395008, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 658432, "linear_attention_total": 2359296, "linear_attention_nnz": 217600, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}}, "total_sparsity": 61.3681447432349, "linear_sparsity": 78.6789882330247}, "speed": {"eval_elapsed_time": 18.375705623999238}, "opt_eval_metrics": {"exact_match": 79.38505203405866, "f1": 87.07400986053686}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41735426, "linear_total": 84934656, "linear_nnz": 17776896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1405440, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 929280}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1732352, "linear_attention_total": 2359296, "linear_attention_nnz": 589568, "linear_dense_total": 4718592, "linear_dense_nnz": 1142784}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1979136, "linear_attention_total": 2359296, "linear_attention_nnz": 628992, "linear_dense_total": 4718592, "linear_dense_nnz": 1350144}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2218752, "linear_attention_total": 2359296, "linear_attention_nnz": 913152, "linear_dense_total": 4718592, "linear_dense_nnz": 1305600}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2257664, "linear_attention_total": 2359296, "linear_attention_nnz": 850688, "linear_dense_total": 4718592, "linear_dense_nnz": 1406976}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096384, "linear_attention_total": 2359296, "linear_attention_nnz": 764672, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1786112, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 1022976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1538816, "linear_attention_total": 2359296, "linear_attention_nnz": 781568, "linear_dense_total": 4718592, "linear_dense_nnz": 757248}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1027840, "linear_attention_total": 2359296, "linear_attention_nnz": 596224, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 176640}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523008, "linear_attention_total": 2359296, "linear_attention_nnz": 378624, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640000, "linear_attention_total": 2359296, "linear_attention_nnz": 208384, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.67306005721974, "linear_sparsity": 79.0699146412037}, "speed": {"eval_elapsed_time": 17.870226074010134}, "opt_eval_metrics": {"exact_match": 78.72280037842951, "f1": 86.62043892712619}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 43462146, "linear_total": 84934656, "linear_nnz": 19503616, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660672, "linear_attention_total": 2359296, "linear_attention_nnz": 579328, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 632576, "linear_dense_total": 4718592, "linear_dense_nnz": 1267200}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2031104, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1446912}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2544128, "linear_attention_total": 2359296, "linear_attention_nnz": 1049600, "linear_dense_total": 4718592, "linear_dense_nnz": 1494528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2395904, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 1479168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2184960, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 1394688}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1912320, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1113600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 969216, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 471808, "linear_dense_total": 4718592, "linear_dense_nnz": 497664}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 717312, "linear_attention_total": 2359296, "linear_attention_nnz": 505344, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631040, "linear_attention_total": 2359296, "linear_attention_nnz": 448256, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 288256, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}}, "total_sparsity": 60.08735936884057, "linear_sparsity": 77.03691647376543}, "speed": {"eval_elapsed_time": 16.235010980977677}, "opt_eval_metrics": {"exact_match": 78.93093661305582, "f1": 86.85787750084084}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42678018, "linear_total": 84934656, "linear_nnz": 18719488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1560320, "linear_attention_total": 2359296, "linear_attention_nnz": 543488, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1816320, "linear_attention_total": 2359296, "linear_attention_nnz": 593664, "linear_dense_total": 4718592, "linear_dense_nnz": 1222656}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2024704, "linear_attention_total": 2359296, "linear_attention_nnz": 603904, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2329856, "linear_attention_total": 2359296, "linear_attention_nnz": 870656, "linear_dense_total": 4718592, "linear_dense_nnz": 1459200}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2332928, "linear_attention_total": 2359296, "linear_attention_nnz": 887552, "linear_dense_total": 4718592, "linear_dense_nnz": 1445376}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 720640, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742080, "linear_attention_total": 2359296, "linear_attention_nnz": 926464, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 944384, "linear_attention_total": 2359296, "linear_attention_nnz": 455936, "linear_dense_total": 4718592, "linear_dense_nnz": 488448}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705280, "linear_attention_total": 2359296, "linear_attention_nnz": 505600, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 587264, "linear_attention_total": 2359296, "linear_attention_nnz": 409088, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 697856, "linear_attention_total": 2359296, "linear_attention_nnz": 250880, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}}, "total_sparsity": 60.80744850279245, "linear_sparsity": 77.96012972608024}, "speed": {"eval_elapsed_time": 16.02295208198484}, "opt_eval_metrics": {"exact_match": 78.78902554399244, "f1": 86.63899702391797}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 46192898, "linear_total": 84934656, "linear_nnz": 22234368, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 551680, "linear_dense_total": 4718592, "linear_dense_nnz": 1539072}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2278656, "linear_attention_total": 2359296, "linear_attention_nnz": 596736, "linear_dense_total": 4718592, "linear_dense_nnz": 1681920}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2418688, "linear_attention_total": 2359296, "linear_attention_nnz": 567808, "linear_dense_total": 4718592, "linear_dense_nnz": 1850880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2844416, "linear_attention_total": 2359296, "linear_attention_nnz": 1002752, "linear_dense_total": 4718592, "linear_dense_nnz": 1841664}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2691072, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 1812480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2475264, "linear_attention_total": 2359296, "linear_attention_nnz": 721152, "linear_dense_total": 4718592, "linear_dense_nnz": 1754112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2229248, "linear_attention_total": 2359296, "linear_attention_nnz": 805376, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1966336, "linear_attention_total": 2359296, "linear_attention_nnz": 892672, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701440, "linear_attention_total": 2359296, "linear_attention_nnz": 454144, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598272, "linear_attention_total": 2359296, "linear_attention_nnz": 361728, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858880, "linear_attention_total": 2359296, "linear_attention_nnz": 238336, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}}, "total_sparsity": 57.57962486284496, "linear_sparsity": 73.82179542824075}, "speed": {"eval_elapsed_time": 17.927992683951743}, "opt_eval_metrics": {"exact_match": 79.38505203405866, "f1": 86.84616693145111}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39741442, "linear_total": 84934656, "linear_nnz": 15782912, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1171968, "linear_attention_total": 2359296, "linear_attention_nnz": 511488, "linear_dense_total": 4718592, "linear_dense_nnz": 660480}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1531136, "linear_attention_total": 2359296, "linear_attention_nnz": 591104, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1722112, "linear_attention_total": 2359296, "linear_attention_nnz": 656128, "linear_dense_total": 4718592, "linear_dense_nnz": 1065984}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2136320, "linear_attention_total": 2359296, "linear_attention_nnz": 985856, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2026752, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1168896}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1822976, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 1138176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488384, "linear_attention_total": 2359296, "linear_attention_nnz": 668160, "linear_dense_total": 4718592, "linear_dense_nnz": 820224}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1423104, "linear_attention_total": 2359296, "linear_attention_nnz": 793344, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 861184, "linear_attention_total": 2359296, "linear_attention_nnz": 494080, "linear_dense_total": 4718592, "linear_dense_nnz": 367104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 570880, "linear_attention_total": 2359296, "linear_attention_nnz": 417280, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 499968, "linear_attention_total": 2359296, "linear_attention_nnz": 370944, "linear_dense_total": 4718592, "linear_dense_nnz": 129024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 528128, "linear_attention_total": 2359296, "linear_attention_nnz": 224000, "linear_dense_total": 4718592, "linear_dense_nnz": 304128}}, "total_sparsity": 63.504197590471826, "linear_sparsity": 81.41758294753086}, "speed": {"eval_elapsed_time": 16.204893412068486}, "opt_eval_metrics": {"exact_match": 78.93093661305582, "f1": 86.77654280449566}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-130000": {"stats": {"total": 108893186, "nnz": 38778370, "linear_total": 84934656, "linear_nnz": 14819840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1050624, "linear_attention_total": 2359296, "linear_attention_nnz": 488448, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 512512, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1628160, "linear_attention_total": 2359296, "linear_attention_nnz": 628224, "linear_dense_total": 4718592, "linear_dense_nnz": 999936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998592, "linear_attention_total": 2359296, "linear_attention_nnz": 937216, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1939968, "linear_attention_total": 2359296, "linear_attention_nnz": 821760, "linear_dense_total": 4718592, "linear_dense_nnz": 1118208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1709824, "linear_attention_total": 2359296, "linear_attention_nnz": 648448, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404928, "linear_attention_total": 2359296, "linear_attention_nnz": 641536, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817920, "linear_attention_total": 2359296, "linear_attention_nnz": 467712, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 544512, "linear_attention_total": 2359296, "linear_attention_nnz": 403200, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484096, "linear_attention_total": 2359296, "linear_attention_nnz": 367360, "linear_dense_total": 4718592, "linear_dense_nnz": 116736}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496896, "linear_attention_total": 2359296, "linear_attention_nnz": 225024, "linear_dense_total": 4718592, "linear_dense_nnz": 271872}}, "total_sparsity": 64.38861656596218, "linear_sparsity": 82.5514805169753}, "speed": {"eval_elapsed_time": 16.045786170987412}, "opt_eval_metrics": {"exact_match": 78.88363292336803, "f1": 86.63235572290178}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-165000": {"stats": {"total": 108893186, "nnz": 38293506, "linear_total": 84934656, "linear_nnz": 14334976, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1010688, "linear_attention_total": 2359296, "linear_attention_nnz": 468480, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1371392, "linear_attention_total": 2359296, "linear_attention_nnz": 518912, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1590272, "linear_attention_total": 2359296, "linear_attention_nnz": 608768, "linear_dense_total": 4718592, "linear_dense_nnz": 981504}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895936, "linear_attention_total": 2359296, "linear_attention_nnz": 869888, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1869568, "linear_attention_total": 2359296, "linear_attention_nnz": 775936, "linear_dense_total": 4718592, "linear_dense_nnz": 1093632}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663232, "linear_attention_total": 2359296, "linear_attention_nnz": 618752, "linear_dense_total": 4718592, "linear_dense_nnz": 1044480}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 629248, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1295872, "linear_attention_total": 2359296, "linear_attention_nnz": 707584, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 808704, "linear_attention_total": 2359296, "linear_attention_nnz": 463104, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 515840, "linear_attention_total": 2359296, "linear_attention_nnz": 376064, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455936, "linear_attention_total": 2359296, "linear_attention_nnz": 345344, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 212992, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 64.83388225963009, "linear_sparsity": 83.1223476080247}, "speed": {"eval_elapsed_time": 15.8394883510191}, "opt_eval_metrics": {"exact_match": 78.9593188268685, "f1": 86.71766917125102}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38916354, "linear_total": 84934656, "linear_nnz": 14957824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1209344, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 749568}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1494272, "linear_attention_total": 2359296, "linear_attention_nnz": 488192, "linear_dense_total": 4718592, "linear_dense_nnz": 1006080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1636096, "linear_attention_total": 2359296, "linear_attention_nnz": 550144, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969664, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1746944, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 1198080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1782272, "linear_attention_total": 2359296, "linear_attention_nnz": 653312, "linear_dense_total": 4718592, "linear_dense_nnz": 1128960}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1461760, "linear_attention_total": 2359296, "linear_attention_nnz": 593920, "linear_dense_total": 4718592, "linear_dense_nnz": 867840}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 754688, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531968, "linear_attention_total": 2359296, "linear_attention_nnz": 373760, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460032, "linear_attention_total": 2359296, "linear_attention_nnz": 311040, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}}, "total_sparsity": 64.26190156654981, "linear_sparsity": 82.38902150848766}, "speed": {"eval_elapsed_time": 15.605650334036909}, "opt_eval_metrics": {"exact_match": 78.69441816461683, "f1": 86.58409293332078}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 50872322, "linear_total": 84934656, "linear_nnz": 26913792, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692352, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 2007552}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666496, "linear_attention_total": 2359296, "linear_attention_nnz": 646656, "linear_dense_total": 4718592, "linear_dense_nnz": 2019840}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2931200, "linear_attention_total": 2359296, "linear_attention_nnz": 691712, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3361024, "linear_attention_total": 2359296, "linear_attention_nnz": 1149184, "linear_dense_total": 4718592, "linear_dense_nnz": 2211840}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3165952, "linear_attention_total": 2359296, "linear_attention_nnz": 1007872, "linear_dense_total": 4718592, "linear_dense_nnz": 2158080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070976, "linear_attention_total": 2359296, "linear_attention_nnz": 997376, "linear_dense_total": 4718592, "linear_dense_nnz": 2073600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2644480, "linear_attention_total": 2359296, "linear_attention_nnz": 911872, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2248704, "linear_attention_total": 2359296, "linear_attention_nnz": 944640, "linear_dense_total": 4718592, "linear_dense_nnz": 1304064}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1514240, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 751104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 839424, "linear_attention_total": 2359296, "linear_attention_nnz": 526080, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 707072, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 274944}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1071872, "linear_attention_total": 2359296, "linear_attention_nnz": 277760, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}}, "total_sparsity": 53.282364242699266, "linear_sparsity": 68.31235532407408}, "speed": {"eval_elapsed_time": 20.12763703102246}, "opt_eval_metrics": {"exact_match": 79.94323557237465, "f1": 87.52956877579788}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl225_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 27752545, "linear_total": 84934656, "linear_nnz": 3794015, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 465383, "linear_attention_total": 2359296, "linear_attention_nnz": 18728, "linear_dense_total": 4718592, "linear_dense_nnz": 446655}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527397, "linear_attention_total": 2359296, "linear_attention_nnz": 63059, "linear_dense_total": 4718592, "linear_dense_nnz": 464338}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516492, "linear_attention_total": 2359296, "linear_attention_nnz": 53761, "linear_dense_total": 4718592, "linear_dense_nnz": 462731}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 514972, "linear_attention_total": 2359296, "linear_attention_nnz": 84624, "linear_dense_total": 4718592, "linear_dense_nnz": 430348}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 443214, "linear_attention_total": 2359296, "linear_attention_nnz": 58345, "linear_dense_total": 4718592, "linear_dense_nnz": 384869}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 396921, "linear_attention_total": 2359296, "linear_attention_nnz": 50615, "linear_dense_total": 4718592, "linear_dense_nnz": 346306}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 319004, "linear_attention_total": 2359296, "linear_attention_nnz": 41344, "linear_dense_total": 4718592, "linear_dense_nnz": 277660}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 249183, "linear_attention_total": 2359296, "linear_attention_nnz": 47420, "linear_dense_total": 4718592, "linear_dense_nnz": 201763}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 161062, "linear_attention_total": 2359296, "linear_attention_nnz": 27562, "linear_dense_total": 4718592, "linear_dense_nnz": 133500}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 81705, "linear_attention_total": 2359296, "linear_attention_nnz": 34151, "linear_dense_total": 4718592, "linear_dense_nnz": 47554}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64643, "linear_attention_total": 2359296, "linear_attention_nnz": 21311, "linear_dense_total": 4718592, "linear_dense_nnz": 43332}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 54039, "linear_attention_total": 2359296, "linear_attention_nnz": 17233, "linear_dense_total": 4718592, "linear_dense_nnz": 36806}}, "total_sparsity": 74.51397463933142, "linear_sparsity": 95.5330189363456}, "speed": {"eval_elapsed_time": 69.66989313997328, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-45000": {"stats": {"total": 108893186, "nnz": 52655769, "linear_total": 84934656, "linear_nnz": 28740096, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3704832, "linear_attention_total": 2359296, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 2131968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2818560, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 2327040}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3674112, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2494464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3592704, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2314752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2942976, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 2156544}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2844672, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 2156544}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2363904, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 1675776}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1738752, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1345536}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1665024, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 878592}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893952, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 402432}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059840, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1440768, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 949248}}, "total_sparsity": 51.64456938563631, "linear_sparsity": 66.162109375}, "speed": {"eval_elapsed_time": 15.83343747886829}, "opt_eval_metrics": {"exact_match": 78.1929990539262, "f1": 85.92206431273945}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 47478801, "linear_total": 84934656, "linear_nnz": 23566848, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2959872, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1485312}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2121216, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1728000}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3085824, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1906176}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3084288, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1806336}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2422272, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 1734144}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2302464, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1712640}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1878528, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1288704}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437696, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1044480}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1494528, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 708096}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 617472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 322560}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 947712, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 456192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1214976, "linear_attention_total": 2359296, "linear_attention_nnz": 442368, "linear_dense_total": 4718592, "linear_dense_nnz": 772608}}, "total_sparsity": 56.39874013788153, "linear_sparsity": 72.2529658564815}, "speed": {"eval_elapsed_time": 14.195255008991808}, "opt_eval_metrics": {"exact_match": 77.69157994323557, "f1": 85.75507572992562}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41730197, "linear_total": 84934656, "linear_nnz": 17822208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2202624, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2397696, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1218048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2302464, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1122816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1794048, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1460736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1155072, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1290240, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 503808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 423936, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 227328}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 806400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 904704, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 511488}}, "total_sparsity": 61.67786201057612, "linear_sparsity": 79.0165653935185}, "speed": {"eval_elapsed_time": 12.292132368078455}, "opt_eval_metrics": {"exact_match": 77.70104068117313, "f1": 85.6071153919288}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 33917936, "linear_total": 84934656, "linear_nnz": 9959406, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1111233, "linear_attention_total": 2359296, "linear_attention_nnz": 56754, "linear_dense_total": 4718592, "linear_dense_nnz": 1054479}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222867, "linear_attention_total": 2359296, "linear_attention_nnz": 116764, "linear_dense_total": 4718592, "linear_dense_nnz": 1106103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264439, "linear_attention_total": 2359296, "linear_attention_nnz": 127558, "linear_dense_total": 4718592, "linear_dense_nnz": 1136881}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1270104, "linear_attention_total": 2359296, "linear_attention_nnz": 163709, "linear_dense_total": 4718592, "linear_dense_nnz": 1106395}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1202300, "linear_attention_total": 2359296, "linear_attention_nnz": 158018, "linear_dense_total": 4718592, "linear_dense_nnz": 1044282}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1136195, "linear_attention_total": 2359296, "linear_attention_nnz": 125746, "linear_dense_total": 4718592, "linear_dense_nnz": 1010449}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 971117, "linear_attention_total": 2359296, "linear_attention_nnz": 110023, "linear_dense_total": 4718592, "linear_dense_nnz": 861094}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746075, "linear_attention_total": 2359296, "linear_attention_nnz": 113086, "linear_dense_total": 4718592, "linear_dense_nnz": 632989}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488971, "linear_attention_total": 2359296, "linear_attention_nnz": 81879, "linear_dense_total": 4718592, "linear_dense_nnz": 407092}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 250695, "linear_attention_total": 2359296, "linear_attention_nnz": 77365, "linear_dense_total": 4718592, "linear_dense_nnz": 173330}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 172793, "linear_attention_total": 2359296, "linear_attention_nnz": 50915, "linear_dense_total": 4718592, "linear_dense_nnz": 121878}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 122617, "linear_attention_total": 2359296, "linear_attention_nnz": 28303, "linear_dense_total": 4718592, "linear_dense_nnz": 94314}}, "total_sparsity": 68.85210429971255, "linear_sparsity": 88.27403739646628}, "speed": {"eval_elapsed_time": 75.02001089300029}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33825359, "linear_total": 84934656, "linear_nnz": 9866829, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1100628, "linear_attention_total": 2359296, "linear_attention_nnz": 56086, "linear_dense_total": 4718592, "linear_dense_nnz": 1044542}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211778, "linear_attention_total": 2359296, "linear_attention_nnz": 115328, "linear_dense_total": 4718592, "linear_dense_nnz": 1096450}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253069, "linear_attention_total": 2359296, "linear_attention_nnz": 125881, "linear_dense_total": 4718592, "linear_dense_nnz": 1127188}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258511, "linear_attention_total": 2359296, "linear_attention_nnz": 161525, "linear_dense_total": 4718592, "linear_dense_nnz": 1096986}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1191705, "linear_attention_total": 2359296, "linear_attention_nnz": 155911, "linear_dense_total": 4718592, "linear_dense_nnz": 1035794}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125428, "linear_attention_total": 2359296, "linear_attention_nnz": 123921, "linear_dense_total": 4718592, "linear_dense_nnz": 1001507}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 961919, "linear_attention_total": 2359296, "linear_attention_nnz": 108430, "linear_dense_total": 4718592, "linear_dense_nnz": 853489}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738628, "linear_attention_total": 2359296, "linear_attention_nnz": 111505, "linear_dense_total": 4718592, "linear_dense_nnz": 627123}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484188, "linear_attention_total": 2359296, "linear_attention_nnz": 80805, "linear_dense_total": 4718592, "linear_dense_nnz": 403383}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 247948, "linear_attention_total": 2359296, "linear_attention_nnz": 76456, "linear_dense_total": 4718592, "linear_dense_nnz": 171492}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 171235, "linear_attention_total": 2359296, "linear_attention_nnz": 50374, "linear_dense_total": 4718592, "linear_dense_nnz": 120861}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 121792, "linear_attention_total": 2359296, "linear_attention_nnz": 28038, "linear_dense_total": 4718592, "linear_dense_nnz": 93754}}, "total_sparsity": 68.93712063856779, "linear_sparsity": 88.38303530657733}, "speed": {"eval_elapsed_time": 75.69579442497343}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl150_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 29470276, "linear_total": 84934656, "linear_nnz": 5511746, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655184, "linear_attention_total": 2359296, "linear_attention_nnz": 30729, "linear_dense_total": 4718592, "linear_dense_nnz": 624455}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 733131, "linear_attention_total": 2359296, "linear_attention_nnz": 77742, "linear_dense_total": 4718592, "linear_dense_nnz": 655389}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730379, "linear_attention_total": 2359296, "linear_attention_nnz": 70206, "linear_dense_total": 4718592, "linear_dense_nnz": 660173}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 734451, "linear_attention_total": 2359296, "linear_attention_nnz": 106339, "linear_dense_total": 4718592, "linear_dense_nnz": 628112}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655863, "linear_attention_total": 2359296, "linear_attention_nnz": 81845, "linear_dense_total": 4718592, "linear_dense_nnz": 574018}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 606306, "linear_attention_total": 2359296, "linear_attention_nnz": 68554, "linear_dense_total": 4718592, "linear_dense_nnz": 537752}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 492846, "linear_attention_total": 2359296, "linear_attention_nnz": 58217, "linear_dense_total": 4718592, "linear_dense_nnz": 434629}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379389, "linear_attention_total": 2359296, "linear_attention_nnz": 65705, "linear_dense_total": 4718592, "linear_dense_nnz": 313684}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 243207, "linear_attention_total": 2359296, "linear_attention_nnz": 39483, "linear_dense_total": 4718592, "linear_dense_nnz": 203724}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 119606, "linear_attention_total": 2359296, "linear_attention_nnz": 46007, "linear_dense_total": 4718592, "linear_dense_nnz": 73599}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 89281, "linear_attention_total": 2359296, "linear_attention_nnz": 27892, "linear_dense_total": 4718592, "linear_dense_nnz": 61389}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 72103, "linear_attention_total": 2359296, "linear_attention_nnz": 20781, "linear_dense_total": 4718592, "linear_dense_nnz": 51322}}, "total_sparsity": 72.93652882926945, "linear_sparsity": 93.51060419906804}, "speed": {"eval_elapsed_time": 71.46695366402855, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-35000": {"stats": {"total": 108893186, "nnz": 72867394, "linear_total": 84934656, "linear_nnz": 48920576, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4819968, "linear_attention_total": 2359296, "linear_attention_nnz": 804864, "linear_dense_total": 4718592, "linear_dense_nnz": 4015104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4609024, "linear_attention_total": 2359296, "linear_attention_nnz": 636928, "linear_dense_total": 4718592, "linear_dense_nnz": 3972096}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4993024, "linear_attention_total": 2359296, "linear_attention_nnz": 959488, "linear_dense_total": 4718592, "linear_dense_nnz": 4033536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5082112, "linear_attention_total": 2359296, "linear_attention_nnz": 1008640, "linear_dense_total": 4718592, "linear_dense_nnz": 4073472}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5051392, "linear_attention_total": 2359296, "linear_attention_nnz": 1057792, "linear_dense_total": 4718592, "linear_dense_nnz": 3993600}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4961280, "linear_attention_total": 2359296, "linear_attention_nnz": 987136, "linear_dense_total": 4718592, "linear_dense_nnz": 3974144}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4688896, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 3772416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4421632, "linear_attention_total": 2359296, "linear_attention_nnz": 822272, "linear_dense_total": 4718592, "linear_dense_nnz": 3599360}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3827712, "linear_attention_total": 2359296, "linear_attention_nnz": 719872, "linear_dense_total": 4718592, "linear_dense_nnz": 3107840}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2715648, "linear_attention_total": 2359296, "linear_attention_nnz": 463872, "linear_dense_total": 4718592, "linear_dense_nnz": 2251776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2140160, "linear_attention_total": 2359296, "linear_attention_nnz": 386048, "linear_dense_total": 4718592, "linear_dense_nnz": 1754112}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1609728, "linear_attention_total": 2359296, "linear_attention_nnz": 190464, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}}, "total_sparsity": 33.08360543330967, "linear_sparsity": 42.40210262345679}, "speed": {"eval_elapsed_time": 19.48546407208778}, "opt_eval_metrics": {"exact_match": 0.23651844843897823, "f1": 7.038988743323127}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39356610, "linear_total": 84934656, "linear_nnz": 15444992, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1024000, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 512000}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236992, "linear_attention_total": 2359296, "linear_attention_nnz": 551936, "linear_dense_total": 4718592, "linear_dense_nnz": 685056}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1934336, "linear_attention_total": 2359296, "linear_attention_nnz": 722944, "linear_dense_total": 4718592, "linear_dense_nnz": 1211392}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2352128, "linear_attention_total": 2359296, "linear_attention_nnz": 954368, "linear_dense_total": 4718592, "linear_dense_nnz": 1397760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028544, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1238016}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1880064, "linear_attention_total": 2359296, "linear_attention_nnz": 584704, "linear_dense_total": 4718592, "linear_dense_nnz": 1295360}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1627136, "linear_attention_total": 2359296, "linear_attention_nnz": 608256, "linear_dense_total": 4718592, "linear_dense_nnz": 1018880}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1316864, "linear_attention_total": 2359296, "linear_attention_nnz": 740352, "linear_dense_total": 4718592, "linear_dense_nnz": 576512}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 673792, "linear_attention_total": 2359296, "linear_attention_nnz": 510976, "linear_dense_total": 4718592, "linear_dense_nnz": 162816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 451584, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 94208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 304128, "linear_dense_total": 4718592, "linear_dense_nnz": 197632}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417792, "linear_attention_total": 2359296, "linear_attention_nnz": 197632, "linear_dense_total": 4718592, "linear_dense_nnz": 220160}}, "total_sparsity": 63.85760078688487, "linear_sparsity": 81.81544174382715}, "speed": {"eval_elapsed_time": 15.847133659990504}, "opt_eval_metrics": {"exact_match": 11.031220435193944, "f1": 22.56529237758617}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39183362, "linear_total": 84934656, "linear_nnz": 15271936, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1032192, "linear_attention_total": 2359296, "linear_attention_nnz": 513024, "linear_dense_total": 4718592, "linear_dense_nnz": 519168}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1215488, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 692224}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1922048, "linear_attention_total": 2359296, "linear_attention_nnz": 683008, "linear_dense_total": 4718592, "linear_dense_nnz": 1239040}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2319360, "linear_attention_total": 2359296, "linear_attention_nnz": 945152, "linear_dense_total": 4718592, "linear_dense_nnz": 1374208}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045952, "linear_attention_total": 2359296, "linear_attention_nnz": 809984, "linear_dense_total": 4718592, "linear_dense_nnz": 1235968}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1847296, "linear_attention_total": 2359296, "linear_attention_nnz": 581632, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1607680, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1287168, "linear_attention_total": 2359296, "linear_attention_nnz": 708608, "linear_dense_total": 4718592, "linear_dense_nnz": 578560}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631808, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 158720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 442368, "linear_attention_total": 2359296, "linear_attention_nnz": 352256, "linear_dense_total": 4718592, "linear_dense_nnz": 90112}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 312320, "linear_dense_total": 4718592, "linear_dense_nnz": 206848}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 401408, "linear_attention_total": 2359296, "linear_attention_nnz": 186368, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 64.01669981444019, "linear_sparsity": 82.0191936728395}, "speed": {"eval_elapsed_time": 15.823482112959027}, "opt_eval_metrics": {"exact_match": 10.331125827814569, "f1": 21.308966277221426}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-50000": {"stats": {"total": 108893186, "nnz": 40770050, "linear_total": 84934656, "linear_nnz": 16811520, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2190336, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1641984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 855552}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2171904, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 992256}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1900032, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1717248, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 1029120}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1789440, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1003008}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1319424, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 729600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1030656, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 637440}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1179648, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 393216}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 715776, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 224256}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}}, "total_sparsity": 62.55959486757969, "linear_sparsity": 80.20652488425925}, "speed": {"eval_elapsed_time": 12.685803183936514}, "opt_eval_metrics": {"exact_match": 75.79943235572375, "f1": 84.3797785815339}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 37334018, "linear_total": 84934656, "linear_nnz": 13375488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663488, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1282560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451520, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 566784}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1385472, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1265664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 675840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1069056, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.7150099364344, "linear_sparsity": 84.25202546296296}, "speed": {"eval_elapsed_time": 11.091999777941965}, "opt_eval_metrics": {"exact_match": 76.54683065279092, "f1": 84.56290825102765}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37189634, "linear_total": 84934656, "linear_nnz": 13231104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1658880, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1233408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1354752, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1387008, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 678912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.84760225492897, "linear_sparsity": 84.42201967592592}, "speed": {"eval_elapsed_time": 11.029475754010491}, "opt_eval_metrics": {"exact_match": 75.99810785241249, "f1": 84.26442986520863}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36773378, "linear_total": 84934656, "linear_nnz": 12814848, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1044480, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1177088, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1450496, "linear_attention_total": 2359296, "linear_attention_nnz": 492032, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652224, "linear_attention_total": 2359296, "linear_attention_nnz": 733696, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1511680, "linear_attention_total": 2359296, "linear_attention_nnz": 461056, "linear_dense_total": 4718592, "linear_dense_nnz": 1050624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1533952, "linear_attention_total": 2359296, "linear_attention_nnz": 580096, "linear_dense_total": 4718592, "linear_dense_nnz": 953856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1227520, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 764928}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 624384, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 700416, "linear_attention_total": 2359296, "linear_attention_nnz": 351744, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 479744, "linear_attention_total": 2359296, "linear_attention_nnz": 339968, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411392, "linear_attention_total": 2359296, "linear_attention_nnz": 276224, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 430080, "linear_attention_total": 2359296, "linear_attention_nnz": 178176, "linear_dense_total": 4718592, "linear_dense_nnz": 251904}}, "total_sparsity": 66.22986308803564, "linear_sparsity": 84.912109375}, "speed": {"eval_elapsed_time": 14.409963917918503}, "opt_eval_metrics": {"exact_match": 77.93755912961211, "f1": 86.0611894864831}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 47136529, "linear_total": 84934656, "linear_nnz": 23220736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1984512, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2164224, "linear_attention_total": 2359296, "linear_attention_nnz": 592896, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2625536, "linear_attention_total": 2359296, "linear_attention_nnz": 880640, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2992640, "linear_attention_total": 2359296, "linear_attention_nnz": 1230848, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2940928, "linear_attention_total": 2359296, "linear_attention_nnz": 1214464, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2535936, "linear_attention_total": 2359296, "linear_attention_nnz": 906240, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2213376, "linear_attention_total": 2359296, "linear_attention_nnz": 943104, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1923584, "linear_attention_total": 2359296, "linear_attention_nnz": 935936, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1419264, "linear_attention_total": 2359296, "linear_attention_nnz": 872448, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 883712, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667648, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 869376, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 56.713059162397904, "linear_sparsity": 72.66046971450618}, "speed": {"eval_elapsed_time": 19.07873147400096}, "opt_eval_metrics": {"exact_match": 80.27436140018922, "f1": 87.70461789964966}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46572775, "linear_total": 84934656, "linear_nnz": 22657536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1908736, "linear_attention_total": 2359296, "linear_attention_nnz": 627712, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2145280, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 1548288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2499072, "linear_attention_total": 2359296, "linear_attention_nnz": 789504, "linear_dense_total": 4718592, "linear_dense_nnz": 1709568}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2920960, "linear_attention_total": 2359296, "linear_attention_nnz": 1180672, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1204224, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2516992, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 1600512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2151936, "linear_attention_total": 2359296, "linear_attention_nnz": 909312, "linear_dense_total": 4718592, "linear_dense_nnz": 1242624}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1889792, "linear_attention_total": 2359296, "linear_attention_nnz": 917504, "linear_dense_total": 4718592, "linear_dense_nnz": 972288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1398272, "linear_attention_total": 2359296, "linear_attention_nnz": 856064, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858624, "linear_attention_total": 2359296, "linear_attention_nnz": 611328, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 634368, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 827392, "linear_attention_total": 2359296, "linear_attention_nnz": 268288, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}}, "total_sparsity": 57.23077199706509, "linear_sparsity": 73.32356770833333}, "speed": {"eval_elapsed_time": 18.581735570915043}, "opt_eval_metrics": {"exact_match": 79.80132450331126, "f1": 87.48291010744668}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-25000": {"stats": {"total": 108893186, "nnz": 97257474, "linear_total": 84934656, "linear_nnz": 73298944, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6452736, "linear_attention_total": 2359296, "linear_attention_nnz": 1889280, "linear_dense_total": 4718592, "linear_dense_nnz": 4563456}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6338048, "linear_attention_total": 2359296, "linear_attention_nnz": 1839104, "linear_dense_total": 4718592, "linear_dense_nnz": 4498944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6545920, "linear_attention_total": 2359296, "linear_attention_nnz": 2037760, "linear_dense_total": 4718592, "linear_dense_nnz": 4508160}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6564864, "linear_attention_total": 2359296, "linear_attention_nnz": 2056704, "linear_dense_total": 4718592, "linear_dense_nnz": 4508160}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6459904, "linear_attention_total": 2359296, "linear_attention_nnz": 2000896, "linear_dense_total": 4718592, "linear_dense_nnz": 4459008}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6377728, "linear_attention_total": 2359296, "linear_attention_nnz": 1963264, "linear_dense_total": 4718592, "linear_dense_nnz": 4414464}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6199808, "linear_attention_total": 2359296, "linear_attention_nnz": 1899008, "linear_dense_total": 4718592, "linear_dense_nnz": 4300800}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6070272, "linear_attention_total": 2359296, "linear_attention_nnz": 1938432, "linear_dense_total": 4718592, "linear_dense_nnz": 4131840}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5774848, "linear_attention_total": 2359296, "linear_attention_nnz": 1905664, "linear_dense_total": 4718592, "linear_dense_nnz": 3869184}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5543168, "linear_attention_total": 2359296, "linear_attention_nnz": 1779968, "linear_dense_total": 4718592, "linear_dense_nnz": 3763200}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5265920, "linear_attention_total": 2359296, "linear_attention_nnz": 1662464, "linear_dense_total": 4718592, "linear_dense_nnz": 3603456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5705728, "linear_attention_total": 2359296, "linear_attention_nnz": 1722880, "linear_dense_total": 4718592, "linear_dense_nnz": 3982848}}, "total_sparsity": 10.685436276976967, "linear_sparsity": 13.699604552469136}, "speed": {"eval_elapsed_time": 39.34595324099064}, "opt_eval_metrics": {"exact_match": 70.2554399243141, "f1": 79.54398424308184}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 58295010, "linear_total": 84934656, "linear_nnz": 34364416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2712576, "linear_attention_total": 2359296, "linear_attention_nnz": 934912, "linear_dense_total": 4718592, "linear_dense_nnz": 1777664}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2907136, "linear_attention_total": 2359296, "linear_attention_nnz": 738304, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4201472, "linear_attention_total": 2359296, "linear_attention_nnz": 1162240, "linear_dense_total": 4718592, "linear_dense_nnz": 3039232}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4531200, "linear_attention_total": 2359296, "linear_attention_nnz": 1366016, "linear_dense_total": 4718592, "linear_dense_nnz": 3165184}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4667392, "linear_attention_total": 2359296, "linear_attention_nnz": 1484800, "linear_dense_total": 4718592, "linear_dense_nnz": 3182592}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4599808, "linear_attention_total": 2359296, "linear_attention_nnz": 1414144, "linear_dense_total": 4718592, "linear_dense_nnz": 3185664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3828736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 2572288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2659328, "linear_attention_total": 2359296, "linear_attention_nnz": 991232, "linear_dense_total": 4718592, "linear_dense_nnz": 1668096}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1654784, "linear_attention_total": 2359296, "linear_attention_nnz": 966656, "linear_dense_total": 4718592, "linear_dense_nnz": 688128}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 927744, "linear_attention_total": 2359296, "linear_attention_nnz": 691200, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 873472, "linear_attention_total": 2359296, "linear_attention_nnz": 530432, "linear_dense_total": 4718592, "linear_dense_nnz": 343040}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 800768, "linear_attention_total": 2359296, "linear_attention_nnz": 378880, "linear_dense_total": 4718592, "linear_dense_nnz": 421888}}, "total_sparsity": 46.46587895775224, "linear_sparsity": 59.540171682098766}, "speed": {"eval_elapsed_time": 19.1117691679392}, "opt_eval_metrics": {"exact_match": 3.5383159886471143, "f1": 14.801770078824811}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 56885634, "linear_total": 84934656, "linear_nnz": 32956416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2584576, "linear_attention_total": 2359296, "linear_attention_nnz": 949248, "linear_dense_total": 4718592, "linear_dense_nnz": 1635328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2798592, "linear_attention_total": 2359296, "linear_attention_nnz": 750592, "linear_dense_total": 4718592, "linear_dense_nnz": 2048000}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4019200, "linear_attention_total": 2359296, "linear_attention_nnz": 1123328, "linear_dense_total": 4718592, "linear_dense_nnz": 2895872}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4274176, "linear_attention_total": 2359296, "linear_attention_nnz": 1306624, "linear_dense_total": 4718592, "linear_dense_nnz": 2967552}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4581376, "linear_attention_total": 2359296, "linear_attention_nnz": 1475584, "linear_dense_total": 4718592, "linear_dense_nnz": 3105792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4219904, "linear_attention_total": 2359296, "linear_attention_nnz": 1285120, "linear_dense_total": 4718592, "linear_dense_nnz": 2934784}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3736576, "linear_attention_total": 2359296, "linear_attention_nnz": 1235968, "linear_dense_total": 4718592, "linear_dense_nnz": 2500608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2587648, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1604608}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1627136, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 661504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 880640, "linear_attention_total": 2359296, "linear_attention_nnz": 650240, "linear_dense_total": 4718592, "linear_dense_nnz": 230400}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 862208, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 352256}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784384, "linear_attention_total": 2359296, "linear_attention_nnz": 363520, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}}, "total_sparsity": 47.76015277944021, "linear_sparsity": 61.19791666666667}, "speed": {"eval_elapsed_time": 18.703878964995965}, "opt_eval_metrics": {"exact_match": 4.985808893093662, "f1": 15.445793560425729}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 55520034, "linear_total": 84934656, "linear_nnz": 31592448, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2382848, "linear_attention_total": 2359296, "linear_attention_nnz": 889856, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2568192, "linear_attention_total": 2359296, "linear_attention_nnz": 717824, "linear_dense_total": 4718592, "linear_dense_nnz": 1850368}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3915776, "linear_attention_total": 2359296, "linear_attention_nnz": 1113088, "linear_dense_total": 4718592, "linear_dense_nnz": 2802688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4258816, "linear_attention_total": 2359296, "linear_attention_nnz": 1297408, "linear_dense_total": 4718592, "linear_dense_nnz": 2961408}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4300800, "linear_attention_total": 2359296, "linear_attention_nnz": 1402880, "linear_dense_total": 4718592, "linear_dense_nnz": 2897920}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4030464, "linear_attention_total": 2359296, "linear_attention_nnz": 1157120, "linear_dense_total": 4718592, "linear_dense_nnz": 2873344}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3661824, "linear_attention_total": 2359296, "linear_attention_nnz": 1187840, "linear_dense_total": 4718592, "linear_dense_nnz": 2473984}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2507776, "linear_attention_total": 2359296, "linear_attention_nnz": 979968, "linear_dense_total": 4718592, "linear_dense_nnz": 1527808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1562624, "linear_attention_total": 2359296, "linear_attention_nnz": 952320, "linear_dense_total": 4718592, "linear_dense_nnz": 610304}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 865280, "linear_attention_total": 2359296, "linear_attention_nnz": 642048, "linear_dense_total": 4718592, "linear_dense_nnz": 223232}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818176, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 328704}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 719872, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 388096}}, "total_sparsity": 49.0142257386059, "linear_sparsity": 62.80381944444444}, "speed": {"eval_elapsed_time": 18.49347779387608}, "opt_eval_metrics": {"exact_match": 3.869441816461684, "f1": 14.997695992590211}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 55329122, "linear_total": 84934656, "linear_nnz": 31404032, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2845696, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 2385920}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3173376, "linear_attention_total": 2359296, "linear_attention_nnz": 374784, "linear_dense_total": 4718592, "linear_dense_nnz": 2798592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3866624, "linear_attention_total": 2359296, "linear_attention_nnz": 411648, "linear_dense_total": 4718592, "linear_dense_nnz": 3454976}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4224000, "linear_attention_total": 2359296, "linear_attention_nnz": 727040, "linear_dense_total": 4718592, "linear_dense_nnz": 3496960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3954688, "linear_attention_total": 2359296, "linear_attention_nnz": 541696, "linear_dense_total": 4718592, "linear_dense_nnz": 3412992}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3993600, "linear_attention_total": 2359296, "linear_attention_nnz": 545792, "linear_dense_total": 4718592, "linear_dense_nnz": 3447808}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3427328, "linear_attention_total": 2359296, "linear_attention_nnz": 493568, "linear_dense_total": 4718592, "linear_dense_nnz": 2933760}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2641920, "linear_attention_total": 2359296, "linear_attention_nnz": 641024, "linear_dense_total": 4718592, "linear_dense_nnz": 2000896}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1293312, "linear_attention_total": 2359296, "linear_attention_nnz": 288768, "linear_dense_total": 4718592, "linear_dense_nnz": 1004544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 678912, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 339968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 671744, "linear_attention_total": 2359296, "linear_attention_nnz": 254976, "linear_dense_total": 4718592, "linear_dense_nnz": 416768}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 632832, "linear_attention_total": 2359296, "linear_attention_nnz": 165888, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}}, "total_sparsity": 49.1895461668281, "linear_sparsity": 63.025655864197525}, "speed": {"eval_elapsed_time": 14.250905813882127}, "opt_eval_metrics": {"exact_match": 2.251655629139073, "f1": 12.738616532125459}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 54788706, "linear_total": 84934656, "linear_nnz": 30864384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2661376, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 2226176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3087360, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 2727936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3895296, "linear_attention_total": 2359296, "linear_attention_nnz": 421888, "linear_dense_total": 4718592, "linear_dense_nnz": 3473408}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4162560, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 3451904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3985408, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 3437568}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3881984, "linear_attention_total": 2359296, "linear_attention_nnz": 556032, "linear_dense_total": 4718592, "linear_dense_nnz": 3325952}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3340288, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 2828288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2614272, "linear_attention_total": 2359296, "linear_attention_nnz": 622592, "linear_dense_total": 4718592, "linear_dense_nnz": 1991680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1256448, "linear_attention_total": 2359296, "linear_attention_nnz": 276480, "linear_dense_total": 4718592, "linear_dense_nnz": 979968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668672, "linear_attention_total": 2359296, "linear_attention_nnz": 337920, "linear_dense_total": 4718592, "linear_dense_nnz": 330752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 664576, "linear_attention_total": 2359296, "linear_attention_nnz": 252928, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 646144, "linear_attention_total": 2359296, "linear_attention_nnz": 158720, "linear_dense_total": 4718592, "linear_dense_nnz": 487424}}, "total_sparsity": 49.68582699012958, "linear_sparsity": 63.66102430555556}, "speed": {"eval_elapsed_time": 13.911966318031773}, "opt_eval_metrics": {"exact_match": 2.346263008514664, "f1": 13.343923627183553}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53430466, "linear_total": 84934656, "linear_nnz": 29507584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2533376, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 2119680}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2840576, "linear_attention_total": 2359296, "linear_attention_nnz": 364544, "linear_dense_total": 4718592, "linear_dense_nnz": 2476032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3843072, "linear_attention_total": 2359296, "linear_attention_nnz": 397312, "linear_dense_total": 4718592, "linear_dense_nnz": 3445760}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4069376, "linear_attention_total": 2359296, "linear_attention_nnz": 666624, "linear_dense_total": 4718592, "linear_dense_nnz": 3402752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3831808, "linear_attention_total": 2359296, "linear_attention_nnz": 492544, "linear_dense_total": 4718592, "linear_dense_nnz": 3339264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3714048, "linear_attention_total": 2359296, "linear_attention_nnz": 519168, "linear_dense_total": 4718592, "linear_dense_nnz": 3194880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3200000, "linear_attention_total": 2359296, "linear_attention_nnz": 448512, "linear_dense_total": 4718592, "linear_dense_nnz": 2751488}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415616, "linear_attention_total": 2359296, "linear_attention_nnz": 576512, "linear_dense_total": 4718592, "linear_dense_nnz": 1839104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211392, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619520, "linear_attention_total": 2359296, "linear_attention_nnz": 317440, "linear_dense_total": 4718592, "linear_dense_nnz": 302080}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 623616, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 386048}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 50.93314103235074, "linear_sparsity": 65.25848765432099}, "speed": {"eval_elapsed_time": 13.945766709977761}, "opt_eval_metrics": {"exact_match": 1.8826868495742668, "f1": 12.787819348714812}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 44512539, "linear_total": 84934656, "linear_nnz": 20599296, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1869312, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 1254912}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2022400, "linear_attention_total": 2359296, "linear_attention_nnz": 612352, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2259968, "linear_attention_total": 2359296, "linear_attention_nnz": 699392, "linear_dense_total": 4718592, "linear_dense_nnz": 1560576}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2543616, "linear_attention_total": 2359296, "linear_attention_nnz": 1010688, "linear_dense_total": 4718592, "linear_dense_nnz": 1532928}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2463744, "linear_attention_total": 2359296, "linear_attention_nnz": 927744, "linear_dense_total": 4718592, "linear_dense_nnz": 1536000}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2345472, "linear_attention_total": 2359296, "linear_attention_nnz": 872448, "linear_dense_total": 4718592, "linear_dense_nnz": 1473024}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2036224, "linear_attention_total": 2359296, "linear_attention_nnz": 867328, "linear_dense_total": 4718592, "linear_dense_nnz": 1168896}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1800192, "linear_attention_total": 2359296, "linear_attention_nnz": 897024, "linear_dense_total": 4718592, "linear_dense_nnz": 903168}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1165824, "linear_attention_total": 2359296, "linear_attention_nnz": 642048, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 734208, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 576512, "linear_attention_total": 2359296, "linear_attention_nnz": 386048, "linear_dense_total": 4718592, "linear_dense_nnz": 190464}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 781824, "linear_attention_total": 2359296, "linear_attention_nnz": 261120, "linear_dense_total": 4718592, "linear_dense_nnz": 520704}}, "total_sparsity": 59.12275080279128, "linear_sparsity": 75.7468894675926}, "speed": {"eval_elapsed_time": 17.345293765887618}, "opt_eval_metrics": {"exact_match": 79.36613055818354, "f1": 87.31339978481493}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 40733175, "linear_total": 84934656, "linear_nnz": 16822784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1394688, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1640960, "linear_attention_total": 2359296, "linear_attention_nnz": 539648, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1878528, "linear_attention_total": 2359296, "linear_attention_nnz": 657408, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 864256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1902592, "linear_attention_total": 2359296, "linear_attention_nnz": 686080, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601536, "linear_attention_total": 2359296, "linear_attention_nnz": 649216, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1507328, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908800, "linear_attention_total": 2359296, "linear_attention_nnz": 474112, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 354304, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591872, "linear_attention_total": 2359296, "linear_attention_nnz": 226304, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 62.593458327135366, "linear_sparsity": 80.19326292438271}, "speed": {"eval_elapsed_time": 15.926922732032835}, "opt_eval_metrics": {"exact_match": 78.68495742667928, "f1": 86.66781681977909}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40239113, "linear_total": 84934656, "linear_nnz": 16329216, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1344512, "linear_attention_total": 2359296, "linear_attention_nnz": 518144, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 516096, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842176, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2097664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1184256}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2056192, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 664576, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1565184, "linear_attention_total": 2359296, "linear_attention_nnz": 629760, "linear_dense_total": 4718592, "linear_dense_nnz": 935424}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1486336, "linear_attention_total": 2359296, "linear_attention_nnz": 787456, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 844288, "linear_attention_total": 2359296, "linear_attention_nnz": 415744, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 423936, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 472064, "linear_attention_total": 2359296, "linear_attention_nnz": 324608, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555520, "linear_attention_total": 2359296, "linear_attention_nnz": 209920, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}}, "total_sparsity": 63.04717083032174, "linear_sparsity": 80.7743778935185}, "speed": {"eval_elapsed_time": 15.737465491052717}, "opt_eval_metrics": {"exact_match": 78.80794701986756, "f1": 86.74156854566804}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 46385410, "linear_total": 84934656, "linear_nnz": 22426880, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2004992, "linear_attention_total": 2359296, "linear_attention_nnz": 594944, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2177280, "linear_attention_total": 2359296, "linear_attention_nnz": 672000, "linear_dense_total": 4718592, "linear_dense_nnz": 1505280}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2592256, "linear_attention_total": 2359296, "linear_attention_nnz": 859648, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2838016, "linear_attention_total": 2359296, "linear_attention_nnz": 1172992, "linear_dense_total": 4718592, "linear_dense_nnz": 1665024}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2712832, "linear_attention_total": 2359296, "linear_attention_nnz": 1026304, "linear_dense_total": 4718592, "linear_dense_nnz": 1686528}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2600448, "linear_attention_total": 2359296, "linear_attention_nnz": 976896, "linear_dense_total": 4718592, "linear_dense_nnz": 1623552}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2246144, "linear_attention_total": 2359296, "linear_attention_nnz": 955904, "linear_dense_total": 4718592, "linear_dense_nnz": 1290240}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842688, "linear_attention_total": 2359296, "linear_attention_nnz": 901120, "linear_dense_total": 4718592, "linear_dense_nnz": 941568}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1241856, "linear_attention_total": 2359296, "linear_attention_nnz": 718080, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 693760, "linear_attention_total": 2359296, "linear_attention_nnz": 475648, "linear_dense_total": 4718592, "linear_dense_nnz": 218112}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640768, "linear_attention_total": 2359296, "linear_attention_nnz": 441088, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835840, "linear_attention_total": 2359296, "linear_attention_nnz": 246016, "linear_dense_total": 4718592, "linear_dense_nnz": 589824}}, "total_sparsity": 57.4028351048522, "linear_sparsity": 73.59513647762346}, "speed": {"eval_elapsed_time": 19.68077167298179}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 87.07646648866317}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 43189250, "linear_total": 84934656, "linear_nnz": 19230720, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1584896, "linear_attention_total": 2359296, "linear_attention_nnz": 494336, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1917184, "linear_attention_total": 2359296, "linear_attention_nnz": 631552, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2092032, "linear_attention_total": 2359296, "linear_attention_nnz": 648192, "linear_dense_total": 4718592, "linear_dense_nnz": 1443840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2466816, "linear_attention_total": 2359296, "linear_attention_nnz": 1047552, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2403328, "linear_attention_total": 2359296, "linear_attention_nnz": 942592, "linear_dense_total": 4718592, "linear_dense_nnz": 1460736}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2211072, "linear_attention_total": 2359296, "linear_attention_nnz": 837888, "linear_dense_total": 4718592, "linear_dense_nnz": 1373184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1936640, "linear_attention_total": 2359296, "linear_attention_nnz": 841472, "linear_dense_total": 4718592, "linear_dense_nnz": 1095168}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661440, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 827904}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1084160, "linear_attention_total": 2359296, "linear_attention_nnz": 621824, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621056, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 188928}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 568064, "linear_attention_total": 2359296, "linear_attention_nnz": 411392, "linear_dense_total": 4718592, "linear_dense_nnz": 156672}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 684032, "linear_attention_total": 2359296, "linear_attention_nnz": 223232, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 60.33796825450584, "linear_sparsity": 77.3582175925926}, "speed": {"eval_elapsed_time": 18.309701333986595}, "opt_eval_metrics": {"exact_match": 78.74172185430463, "f1": 86.7418554019491}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42070530, "linear_total": 84934656, "linear_nnz": 18112000, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451008, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 970752}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835264, "linear_attention_total": 2359296, "linear_attention_nnz": 620288, "linear_dense_total": 4718592, "linear_dense_nnz": 1214976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2000384, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1374720}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2271232, "linear_attention_total": 2359296, "linear_attention_nnz": 933376, "linear_dense_total": 4718592, "linear_dense_nnz": 1337856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2267904, "linear_attention_total": 2359296, "linear_attention_nnz": 862464, "linear_dense_total": 4718592, "linear_dense_nnz": 1405440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081536, "linear_attention_total": 2359296, "linear_attention_nnz": 783616, "linear_dense_total": 4718592, "linear_dense_nnz": 1297920}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1807104, "linear_attention_total": 2359296, "linear_attention_nnz": 773376, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602048, "linear_attention_total": 2359296, "linear_attention_nnz": 811008, "linear_dense_total": 4718592, "linear_dense_nnz": 791040}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1009920, "linear_attention_total": 2359296, "linear_attention_nnz": 572160, "linear_dense_total": 4718592, "linear_dense_nnz": 437760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 543232, "linear_attention_total": 2359296, "linear_attention_nnz": 392704, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649472, "linear_attention_total": 2359296, "linear_attention_nnz": 214784, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}}, "total_sparsity": 61.365323630075444, "linear_sparsity": 78.67537133487654}, "speed": {"eval_elapsed_time": 18.103150750976056}, "opt_eval_metrics": {"exact_match": 78.92147587511826, "f1": 86.74888507219117}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41670402, "linear_total": 84934656, "linear_nnz": 17711872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1409024, "linear_attention_total": 2359296, "linear_attention_nnz": 468992, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1792000, "linear_attention_total": 2359296, "linear_attention_nnz": 606208, "linear_dense_total": 4718592, "linear_dense_nnz": 1185792}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1974272, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1348608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2231552, "linear_attention_total": 2359296, "linear_attention_nnz": 910592, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2209536, "linear_attention_total": 2359296, "linear_attention_nnz": 828672, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2046464, "linear_attention_total": 2359296, "linear_attention_nnz": 765440, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1764096, "linear_attention_total": 2359296, "linear_attention_nnz": 761088, "linear_dense_total": 4718592, "linear_dense_nnz": 1003008}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1573120, "linear_attention_total": 2359296, "linear_attention_nnz": 792832, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 986880, "linear_attention_total": 2359296, "linear_attention_nnz": 553728, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 572672, "linear_attention_total": 2359296, "linear_attention_nnz": 389888, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 525568, "linear_attention_total": 2359296, "linear_attention_nnz": 378112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}}, "total_sparsity": 61.73277361909495, "linear_sparsity": 79.14647231867285}, "speed": {"eval_elapsed_time": 17.77731288096402}, "opt_eval_metrics": {"exact_match": 78.76064333017976, "f1": 86.70283536757672}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41478658, "linear_total": 84934656, "linear_nnz": 17520128, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404160, "linear_attention_total": 2359296, "linear_attention_nnz": 465664, "linear_dense_total": 4718592, "linear_dense_nnz": 938496}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1766912, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1182720}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1961216, "linear_attention_total": 2359296, "linear_attention_nnz": 615680, "linear_dense_total": 4718592, "linear_dense_nnz": 1345536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 895488, "linear_dense_total": 4718592, "linear_dense_nnz": 1314816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2189824, "linear_attention_total": 2359296, "linear_attention_nnz": 812032, "linear_dense_total": 4718592, "linear_dense_nnz": 1377792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2038016, "linear_attention_total": 2359296, "linear_attention_nnz": 755456, "linear_dense_total": 4718592, "linear_dense_nnz": 1282560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1738240, "linear_attention_total": 2359296, "linear_attention_nnz": 739840, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571584, "linear_attention_total": 2359296, "linear_attention_nnz": 797440, "linear_dense_total": 4718592, "linear_dense_nnz": 774144}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 943872, "linear_attention_total": 2359296, "linear_attention_nnz": 513792, "linear_dense_total": 4718592, "linear_dense_nnz": 430080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563968, "linear_attention_total": 2359296, "linear_attention_nnz": 381184, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516352, "linear_attention_total": 2359296, "linear_attention_nnz": 370432, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 615680, "linear_attention_total": 2359296, "linear_attention_nnz": 200960, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}}, "total_sparsity": 61.90885809879785, "linear_sparsity": 79.37222704475309}, "speed": {"eval_elapsed_time": 17.746100773918442}, "opt_eval_metrics": {"exact_match": 78.85525070955535, "f1": 86.78368120366805}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40218943, "linear_total": 84934656, "linear_nnz": 16260413, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725694, "linear_attention_total": 2359296, "linear_attention_nnz": 36794, "linear_dense_total": 4718592, "linear_dense_nnz": 1688900}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959620, "linear_attention_total": 2359296, "linear_attention_nnz": 233028, "linear_dense_total": 4718592, "linear_dense_nnz": 1726592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969125, "linear_attention_total": 2359296, "linear_attention_nnz": 194318, "linear_dense_total": 4718592, "linear_dense_nnz": 1774807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2012358, "linear_attention_total": 2359296, "linear_attention_nnz": 270153, "linear_dense_total": 4718592, "linear_dense_nnz": 1742205}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860862, "linear_attention_total": 2359296, "linear_attention_nnz": 207935, "linear_dense_total": 4718592, "linear_dense_nnz": 1652927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815188, "linear_attention_total": 2359296, "linear_attention_nnz": 215427, "linear_dense_total": 4718592, "linear_dense_nnz": 1599761}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518978, "linear_attention_total": 2359296, "linear_attention_nnz": 114563, "linear_dense_total": 4718592, "linear_dense_nnz": 1404415}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307646, "linear_attention_total": 2359296, "linear_attention_nnz": 165011, "linear_dense_total": 4718592, "linear_dense_nnz": 1142635}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 946142, "linear_attention_total": 2359296, "linear_attention_nnz": 86589, "linear_dense_total": 4718592, "linear_dense_nnz": 859553}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531809, "linear_attention_total": 2359296, "linear_attention_nnz": 110020, "linear_dense_total": 4718592, "linear_dense_nnz": 421789}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 419075, "linear_attention_total": 2359296, "linear_attention_nnz": 89475, "linear_dense_total": 4718592, "linear_dense_nnz": 329600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 193916, "linear_attention_total": 2359296, "linear_attention_nnz": 45791, "linear_dense_total": 4718592, "linear_dense_nnz": 148125}}, "total_sparsity": 63.065693568741764, "linear_sparsity": 80.85538487375518}, "speed": {"eval_elapsed_time": 59.936431092966814}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39702836, "linear_total": 84934656, "linear_nnz": 15791104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125888, "linear_attention_total": 2359296, "linear_attention_nnz": 838656, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1188352, "linear_attention_total": 2359296, "linear_attention_nnz": 692224, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1694720, "linear_attention_total": 2359296, "linear_attention_nnz": 1089536, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1962496, "linear_attention_total": 2359296, "linear_attention_nnz": 1291264, "linear_dense_total": 4718592, "linear_dense_nnz": 671232}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2112512, "linear_attention_total": 2359296, "linear_attention_nnz": 1384448, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1783296, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1632768, "linear_attention_total": 2359296, "linear_attention_nnz": 1127424, "linear_dense_total": 4718592, "linear_dense_nnz": 505344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1333760, "linear_attention_total": 2359296, "linear_attention_nnz": 942080, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1204736, "linear_attention_total": 2359296, "linear_attention_nnz": 982016, "linear_dense_total": 4718592, "linear_dense_nnz": 222720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 729600, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 573952, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 449024, "linear_attention_total": 2359296, "linear_attention_nnz": 293888, "linear_dense_total": 4718592, "linear_dense_nnz": 155136}}, "total_sparsity": 63.53965068117302, "linear_sparsity": 81.40793788580247}, "speed": {"eval_elapsed_time": 17.66016855603084}, "opt_eval_metrics": {"exact_match": 79.20529801324503, "f1": 87.11181141207972}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 30451970, "linear_total": 84934656, "linear_nnz": 6493440, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 490240, "linear_attention_total": 2359296, "linear_attention_nnz": 259840, "linear_dense_total": 4718592, "linear_dense_nnz": 230400}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591104, "linear_attention_total": 2359296, "linear_attention_nnz": 225536, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 770048, "linear_attention_total": 2359296, "linear_attention_nnz": 286208, "linear_dense_total": 4718592, "linear_dense_nnz": 483840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 863488, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 747008, "linear_attention_total": 2359296, "linear_attention_nnz": 214016, "linear_dense_total": 4718592, "linear_dense_nnz": 532992}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 763392, "linear_attention_total": 2359296, "linear_attention_nnz": 285696, "linear_dense_total": 4718592, "linear_dense_nnz": 477696}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 499456, "linear_attention_total": 2359296, "linear_attention_nnz": 113920, "linear_dense_total": 4718592, "linear_dense_nnz": 385536}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 650752, "linear_attention_total": 2359296, "linear_attention_nnz": 303616, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 407808, "linear_attention_total": 2359296, "linear_attention_nnz": 162048, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 293888, "linear_attention_total": 2359296, "linear_attention_nnz": 206336, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 206336, "linear_attention_total": 2359296, "linear_attention_nnz": 117248, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 209920, "linear_attention_total": 2359296, "linear_attention_nnz": 103936, "linear_dense_total": 4718592, "linear_dense_nnz": 105984}}, "total_sparsity": 72.03500869191208, "linear_sparsity": 92.35478153935185}, "speed": {"eval_elapsed_time": 11.2053101999918}, "opt_eval_metrics": {"exact_match": 73.92620624408704, "f1": 83.01994135540168}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 30144002, "linear_total": 84934656, "linear_nnz": 6185472, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 462848, "linear_attention_total": 2359296, "linear_attention_nnz": 237056, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566784, "linear_attention_total": 2359296, "linear_attention_nnz": 219648, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 744960, "linear_attention_total": 2359296, "linear_attention_nnz": 278016, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 823552, "linear_attention_total": 2359296, "linear_attention_nnz": 379648, "linear_dense_total": 4718592, "linear_dense_nnz": 443904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708352, "linear_attention_total": 2359296, "linear_attention_nnz": 193792, "linear_dense_total": 4718592, "linear_dense_nnz": 514560}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 703488, "linear_attention_total": 2359296, "linear_attention_nnz": 247296, "linear_dense_total": 4718592, "linear_dense_nnz": 456192}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488448, "linear_attention_total": 2359296, "linear_attention_nnz": 118272, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 636160, "linear_attention_total": 2359296, "linear_attention_nnz": 296704, "linear_dense_total": 4718592, "linear_dense_nnz": 339456}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 393728, "linear_attention_total": 2359296, "linear_attention_nnz": 152576, "linear_dense_total": 4718592, "linear_dense_nnz": 241152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 278528, "linear_attention_total": 2359296, "linear_attention_nnz": 190976, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 190976, "linear_attention_total": 2359296, "linear_attention_nnz": 104960, "linear_dense_total": 4718592, "linear_dense_nnz": 86016}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 187648, "linear_attention_total": 2359296, "linear_attention_nnz": 90880, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 72.31782528614784, "linear_sparsity": 92.7173755787037}, "speed": {"eval_elapsed_time": 11.106899423059076}, "opt_eval_metrics": {"exact_match": 73.48155156102176, "f1": 82.77426887329388}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 30105858, "linear_total": 84934656, "linear_nnz": 6147328, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460800, "linear_attention_total": 2359296, "linear_attention_nnz": 235008, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566272, "linear_attention_total": 2359296, "linear_attention_nnz": 220672, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727040, "linear_attention_total": 2359296, "linear_attention_nnz": 260096, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 821760, "linear_attention_total": 2359296, "linear_attention_nnz": 380928, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 718080, "linear_attention_total": 2359296, "linear_attention_nnz": 215808, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 706048, "linear_attention_total": 2359296, "linear_attention_nnz": 251392, "linear_dense_total": 4718592, "linear_dense_nnz": 454656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 475648, "linear_attention_total": 2359296, "linear_attention_nnz": 103936, "linear_dense_total": 4718592, "linear_dense_nnz": 371712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 624384, "linear_attention_total": 2359296, "linear_attention_nnz": 284928, "linear_dense_total": 4718592, "linear_dense_nnz": 339456}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 388608, "linear_attention_total": 2359296, "linear_attention_nnz": 147456, "linear_dense_total": 4718592, "linear_dense_nnz": 241152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 268800, "linear_attention_total": 2359296, "linear_attention_nnz": 181248, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 199168, "linear_attention_total": 2359296, "linear_attention_nnz": 113152, "linear_dense_total": 4718592, "linear_dense_nnz": 86016}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 190720, "linear_attention_total": 2359296, "linear_attention_nnz": 93952, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 72.35285410787779, "linear_sparsity": 92.76228539737654}, "speed": {"eval_elapsed_time": 11.171043560025282}, "opt_eval_metrics": {"exact_match": 73.04635761589404, "f1": 82.29210924509454}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41908226, "linear_total": 84934656, "linear_nnz": 17949696, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081280, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 606720}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529856, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 841728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2437632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2115072, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1927680, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1448448, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 668160}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665088, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 173568}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 595968, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.51437244200017, "linear_sparsity": 78.86646412037037}, "speed": {"eval_elapsed_time": 13.23735156096518}, "opt_eval_metrics": {"exact_match": 78.05108798486282, "f1": 85.81174728555466}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41954306, "linear_total": 84934656, "linear_nnz": 17995776, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2079744, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1626624, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 840192}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434560, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1058304}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2116608, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1035264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1829376, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886208, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1099776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497600, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1210368, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 764928, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 546816, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.4720557446083, "linear_sparsity": 78.81221064814815}, "speed": {"eval_elapsed_time": 12.734316703979857}, "opt_eval_metrics": {"exact_match": 77.62535477767265, "f1": 85.49958980627748}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 38322466, "linear_total": 84934656, "linear_nnz": 14411776, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1015808, "linear_attention_total": 2359296, "linear_attention_nnz": 498688, "linear_dense_total": 4718592, "linear_dense_nnz": 517120}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1176576, "linear_attention_total": 2359296, "linear_attention_nnz": 510976, "linear_dense_total": 4718592, "linear_dense_nnz": 665600}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1846272, "linear_attention_total": 2359296, "linear_attention_nnz": 708608, "linear_dense_total": 4718592, "linear_dense_nnz": 1137664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1953792, "linear_attention_total": 2359296, "linear_attention_nnz": 832512, "linear_dense_total": 4718592, "linear_dense_nnz": 1121280}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1854464, "linear_attention_total": 2359296, "linear_attention_nnz": 739328, "linear_dense_total": 4718592, "linear_dense_nnz": 1115136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1754112, "linear_attention_total": 2359296, "linear_attention_nnz": 576512, "linear_dense_total": 4718592, "linear_dense_nnz": 1177600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522688, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 920576}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1283072, "linear_attention_total": 2359296, "linear_attention_nnz": 728064, "linear_dense_total": 4718592, "linear_dense_nnz": 555008}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 641024, "linear_attention_total": 2359296, "linear_attention_nnz": 465920, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455680, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 98304}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496640, "linear_attention_total": 2359296, "linear_attention_nnz": 311296, "linear_dense_total": 4718592, "linear_dense_nnz": 185344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 199680, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}}, "total_sparsity": 64.80728739078312, "linear_sparsity": 83.03192515432099}, "speed": {"eval_elapsed_time": 15.816457642940804}, "opt_eval_metrics": {"exact_match": 9.403973509933774, "f1": 21.602666371212333}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 37814786, "linear_total": 84934656, "linear_nnz": 13904896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 974848, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 485376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1168384, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1829888, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 1119232}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1906688, "linear_attention_total": 2359296, "linear_attention_nnz": 851968, "linear_dense_total": 4718592, "linear_dense_nnz": 1054720}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1767424, "linear_attention_total": 2359296, "linear_attention_nnz": 708608, "linear_dense_total": 4718592, "linear_dense_nnz": 1058816}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1684480, "linear_attention_total": 2359296, "linear_attention_nnz": 556032, "linear_dense_total": 4718592, "linear_dense_nnz": 1128448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1434624, "linear_attention_total": 2359296, "linear_attention_nnz": 569344, "linear_dense_total": 4718592, "linear_dense_nnz": 865280}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1205248, "linear_attention_total": 2359296, "linear_attention_nnz": 692224, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 635904, "linear_attention_total": 2359296, "linear_attention_nnz": 470016, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 430080, "linear_attention_total": 2359296, "linear_attention_nnz": 347136, "linear_dense_total": 4718592, "linear_dense_nnz": 82944}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 312320, "linear_dense_total": 4718592, "linear_dense_nnz": 189440}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 365568, "linear_attention_total": 2359296, "linear_attention_nnz": 174080, "linear_dense_total": 4718592, "linear_dense_nnz": 191488}}, "total_sparsity": 65.2735057269791, "linear_sparsity": 83.62871334876543}, "speed": {"eval_elapsed_time": 15.455383451189846}, "opt_eval_metrics": {"exact_match": 13.330179754020813, "f1": 25.30359311737543}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36247394, "linear_total": 84934656, "linear_nnz": 12339200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 897024, "linear_attention_total": 2359296, "linear_attention_nnz": 492544, "linear_dense_total": 4718592, "linear_dense_nnz": 404480}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1039360, "linear_attention_total": 2359296, "linear_attention_nnz": 488448, "linear_dense_total": 4718592, "linear_dense_nnz": 550912}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1658880, "linear_attention_total": 2359296, "linear_attention_nnz": 657408, "linear_dense_total": 4718592, "linear_dense_nnz": 1001472}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1664000, "linear_attention_total": 2359296, "linear_attention_nnz": 775168, "linear_dense_total": 4718592, "linear_dense_nnz": 888832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1506304, "linear_attention_total": 2359296, "linear_attention_nnz": 564224, "linear_dense_total": 4718592, "linear_dense_nnz": 942080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1456128, "linear_attention_total": 2359296, "linear_attention_nnz": 494592, "linear_dense_total": 4718592, "linear_dense_nnz": 961536}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1302528, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 790528}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1057792, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 419840}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 414720, "linear_dense_total": 4718592, "linear_dense_nnz": 156672}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 393216, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 79872}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 448512, "linear_attention_total": 2359296, "linear_attention_nnz": 267264, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 344064, "linear_attention_total": 2359296, "linear_attention_nnz": 153600, "linear_dense_total": 4718592, "linear_dense_nnz": 190464}}, "total_sparsity": 66.71289055680674, "linear_sparsity": 85.47212577160494}, "speed": {"eval_elapsed_time": 15.292296970030293}, "opt_eval_metrics": {"exact_match": 12.620624408703879, "f1": 24.953838930607546}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 36346370, "linear_total": 84934656, "linear_nnz": 12387840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1721856, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 950784, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1857024, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437696, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1113600, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1015296, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 425472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 731136, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 999936, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 213504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 296448, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 99840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 614400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 122880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}}, "total_sparsity": 66.62199781720042, "linear_sparsity": 85.4148582175926}, "speed": {"eval_elapsed_time": 11.27580028004013}, "opt_eval_metrics": {"exact_match": 76.75496688741723, "f1": 84.83470649534952}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 35750402, "linear_total": 84934656, "linear_nnz": 11791872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893952, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1247232, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 520704}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 978432, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 388608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 365568, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}}, "total_sparsity": 67.16929377013544, "linear_sparsity": 86.11653645833334}, "speed": {"eval_elapsed_time": 10.814438845962286}, "opt_eval_metrics": {"exact_match": 76.3197729422895, "f1": 84.62201750681498}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 35662850, "linear_total": 84934656, "linear_nnz": 11704320, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 897024, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 258048}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804800, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1184256, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1064448, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 474624}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 364032, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}}, "total_sparsity": 67.24969549518002, "linear_sparsity": 86.21961805555556}, "speed": {"eval_elapsed_time": 10.804757428006269}, "opt_eval_metrics": {"exact_match": 76.5279091769158, "f1": 84.6776690586996}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35638274, "linear_total": 84934656, "linear_nnz": 11679744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1586688, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 210432}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887808, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1175040, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 486912}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1062912, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 290304, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 93696}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360960, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 164352}}, "total_sparsity": 67.27226440045568, "linear_sparsity": 86.24855324074075}, "speed": {"eval_elapsed_time": 10.842320216004737}, "opt_eval_metrics": {"exact_match": 76.31031220435194, "f1": 84.63605545666391}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 36176130, "linear_total": 84934656, "linear_nnz": 12217600, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 911360, "linear_attention_total": 2359296, "linear_attention_nnz": 352256, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1284864, "linear_attention_total": 2359296, "linear_attention_nnz": 478464, "linear_dense_total": 4718592, "linear_dense_nnz": 806400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1386240, "linear_attention_total": 2359296, "linear_attention_nnz": 461568, "linear_dense_total": 4718592, "linear_dense_nnz": 924672}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1616128, "linear_attention_total": 2359296, "linear_attention_nnz": 763648, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1410048, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1415424, "linear_attention_total": 2359296, "linear_attention_nnz": 509184, "linear_dense_total": 4718592, "linear_dense_nnz": 906240}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1157632, "linear_attention_total": 2359296, "linear_attention_nnz": 458752, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1138688, "linear_attention_total": 2359296, "linear_attention_nnz": 550400, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 661504, "linear_attention_total": 2359296, "linear_attention_nnz": 311296, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 448512, "linear_attention_total": 2359296, "linear_attention_nnz": 319488, "linear_dense_total": 4718592, "linear_dense_nnz": 129024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379904, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 407296, "linear_attention_total": 2359296, "linear_attention_nnz": 160000, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}}, "total_sparsity": 66.7783345047871, "linear_sparsity": 85.6152946566358}, "speed": {"eval_elapsed_time": 14.883674454060383}, "opt_eval_metrics": {"exact_match": 76.43330179754021, "f1": 84.92125512821515}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33844482, "linear_total": 84934656, "linear_nnz": 9885952, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701184, "linear_attention_total": 2359296, "linear_attention_nnz": 295680, "linear_dense_total": 4718592, "linear_dense_nnz": 405504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042688, "linear_attention_total": 2359296, "linear_attention_nnz": 380672, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1087744, "linear_attention_total": 2359296, "linear_attention_nnz": 328960, "linear_dense_total": 4718592, "linear_dense_nnz": 758784}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1340672, "linear_attention_total": 2359296, "linear_attention_nnz": 612608, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1142784, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 811008}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1165312, "linear_attention_total": 2359296, "linear_attention_nnz": 411136, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908032, "linear_attention_total": 2359296, "linear_attention_nnz": 319744, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 956672, "linear_attention_total": 2359296, "linear_attention_nnz": 457472, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 557056, "linear_attention_total": 2359296, "linear_attention_nnz": 246784, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360192, "linear_attention_total": 2359296, "linear_attention_nnz": 252672, "linear_dense_total": 4718592, "linear_dense_nnz": 107520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315904, "linear_attention_total": 2359296, "linear_attention_nnz": 202240, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 307712, "linear_attention_total": 2359296, "linear_attention_nnz": 129536, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}}, "total_sparsity": 68.91955939281638, "linear_sparsity": 88.36052035108025}, "speed": {"eval_elapsed_time": 14.104866776964627}, "opt_eval_metrics": {"exact_match": 76.3670766319773, "f1": 84.90500621616839}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 50669453, "linear_total": 84934656, "linear_nnz": 26755584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3454464, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1979904}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2895360, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 2108928}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3497472, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2317824}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2995200, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 2110464}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2832384, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 2045952}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2528256, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 2036736}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2221056, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1631232}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1691136, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1297920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1684992, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 374784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1082880, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 591360}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1300992, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1006080}}, "total_sparsity": 53.46866515596302, "linear_sparsity": 68.4986255787037}, "speed": {"eval_elapsed_time": 15.358230478945188}, "opt_eval_metrics": {"exact_match": 78.12677388836329, "f1": 86.09062317714458}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 46753113, "linear_total": 84934656, "linear_nnz": 22841856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2904576, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1430016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2368512, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1582080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3084288, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1904640}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2600448, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1715712}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2244096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1654272}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096640, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1703424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1910784, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1476096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1082880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1534464, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 748032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523776, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 327168}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 990720, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1107456, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 812544}}, "total_sparsity": 57.06516200196401, "linear_sparsity": 73.10655381944444}, "speed": {"eval_elapsed_time": 13.718609332921915}, "opt_eval_metrics": {"exact_match": 78.31598864711448, "f1": 86.14732314693939}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 46580969, "linear_total": 84934656, "linear_nnz": 22669824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2883072, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1408512}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2342400, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1555968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3055104, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1875456}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2585088, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1700352}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1635840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2082816, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1689600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1901568, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1311744}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1468416, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1075200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1528320, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 741888}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520704, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 324096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1093632, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}}, "total_sparsity": 57.22324719197764, "linear_sparsity": 73.30910011574075}, "speed": {"eval_elapsed_time": 13.616545491036959}, "opt_eval_metrics": {"exact_match": 77.96594134342479, "f1": 85.85795020085484}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46293486, "linear_total": 84934656, "linear_nnz": 22382592, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850816, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1376256}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2323968, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1537536}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3022848, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1843200}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2557440, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1672704}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1620480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2068992, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1675776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790976, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1299456}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1462272, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1523712, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 737280}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 322560}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 970752, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 786432}}, "total_sparsity": 57.487251773494805, "linear_sparsity": 73.6472800925926}, "speed": {"eval_elapsed_time": 13.292588334996253}, "opt_eval_metrics": {"exact_match": 77.88079470198676, "f1": 85.81326419854291}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl300_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 26853628, "linear_total": 84934656, "linear_nnz": 2895098, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 357857, "linear_attention_total": 2359296, "linear_attention_nnz": 13195, "linear_dense_total": 4718592, "linear_dense_nnz": 344662}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 405482, "linear_attention_total": 2359296, "linear_attention_nnz": 53357, "linear_dense_total": 4718592, "linear_dense_nnz": 352125}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 395119, "linear_attention_total": 2359296, "linear_attention_nnz": 43981, "linear_dense_total": 4718592, "linear_dense_nnz": 351138}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 394117, "linear_attention_total": 2359296, "linear_attention_nnz": 71058, "linear_dense_total": 4718592, "linear_dense_nnz": 323059}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 335373, "linear_attention_total": 2359296, "linear_attention_nnz": 47705, "linear_dense_total": 4718592, "linear_dense_nnz": 287668}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 292526, "linear_attention_total": 2359296, "linear_attention_nnz": 40348, "linear_dense_total": 4718592, "linear_dense_nnz": 252178}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 238114, "linear_attention_total": 2359296, "linear_attention_nnz": 33002, "linear_dense_total": 4718592, "linear_dense_nnz": 205112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 188891, "linear_attention_total": 2359296, "linear_attention_nnz": 38753, "linear_dense_total": 4718592, "linear_dense_nnz": 150138}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 123365, "linear_attention_total": 2359296, "linear_attention_nnz": 22052, "linear_dense_total": 4718592, "linear_dense_nnz": 101313}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64415, "linear_attention_total": 2359296, "linear_attention_nnz": 28498, "linear_dense_total": 4718592, "linear_dense_nnz": 35917}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 53470, "linear_attention_total": 2359296, "linear_attention_nnz": 18747, "linear_dense_total": 4718592, "linear_dense_nnz": 34723}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 46369, "linear_attention_total": 2359296, "linear_attention_nnz": 15957, "linear_dense_total": 4718592, "linear_dense_nnz": 30412}}, "total_sparsity": 75.33947808267818, "linear_sparsity": 96.59138196780358}, "speed": {"eval_elapsed_time": 68.15529748401605, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 40641026, "linear_total": 84934656, "linear_nnz": 16682496, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1930752, "linear_attention_total": 2359296, "linear_attention_nnz": 1390080, "linear_dense_total": 4718592, "linear_dense_nnz": 540672}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1347840, "linear_attention_total": 2359296, "linear_attention_nnz": 622848, "linear_dense_total": 4718592, "linear_dense_nnz": 724992}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2423808, "linear_attention_total": 2359296, "linear_attention_nnz": 1506816, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1864704, "linear_attention_total": 2359296, "linear_attention_nnz": 966144, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1956096, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 990720}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742592, "linear_attention_total": 2359296, "linear_attention_nnz": 734976, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1323264, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835584, "linear_attention_total": 2359296, "linear_attention_nnz": 282624, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307904, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 403968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 536064, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 506880, "linear_attention_total": 2359296, "linear_attention_nnz": 322560, "linear_dense_total": 4718592, "linear_dense_nnz": 184320}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 761088, "linear_attention_total": 2359296, "linear_attention_nnz": 412416, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}}, "total_sparsity": 62.67808162027695, "linear_sparsity": 80.3584346064815}, "speed": {"eval_elapsed_time": 13.569498455966823}, "opt_eval_metrics": {"exact_match": 76.12109744560075, "f1": 84.59321000252827}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 39460610, "linear_total": 84934656, "linear_nnz": 15502080, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1801728, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1127424, "linear_attention_total": 2359296, "linear_attention_nnz": 471552, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2341632, "linear_attention_total": 2359296, "linear_attention_nnz": 1507584, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804032, "linear_attention_total": 2359296, "linear_attention_nnz": 960768, "linear_dense_total": 4718592, "linear_dense_nnz": 843264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899264, "linear_attention_total": 2359296, "linear_attention_nnz": 968448, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529088, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 564480, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738048, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 506880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 893952, "linear_dense_total": 4718592, "linear_dense_nnz": 384000}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668928, "linear_attention_total": 2359296, "linear_attention_nnz": 535296, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488448, "linear_attention_total": 2359296, "linear_attention_nnz": 319488, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 560640, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}}, "total_sparsity": 63.76209435179903, "linear_sparsity": 81.7482277199074}, "speed": {"eval_elapsed_time": 12.87935333198402}, "opt_eval_metrics": {"exact_match": 76.2251655629139, "f1": 84.80214537282716}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 39496706, "linear_total": 84934656, "linear_nnz": 15538176, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798656, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1122816, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 649728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2325504, "linear_attention_total": 2359296, "linear_attention_nnz": 1500672, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790208, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886976, "linear_attention_total": 2359296, "linear_attention_nnz": 963840, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522944, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 924672}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 558336, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 737280, "linear_attention_total": 2359296, "linear_attention_nnz": 235008, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1286400, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 382464}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665856, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 417792, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 559104, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 301056}}, "total_sparsity": 63.72894627217538, "linear_sparsity": 81.70572916666666}, "speed": {"eval_elapsed_time": 12.830008602933958}, "opt_eval_metrics": {"exact_match": 75.68590350047303, "f1": 84.47747389903205}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39419906, "linear_total": 84934656, "linear_nnz": 15461376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1800192, "linear_attention_total": 2359296, "linear_attention_nnz": 1334784, "linear_dense_total": 4718592, "linear_dense_nnz": 465408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1118976, "linear_attention_total": 2359296, "linear_attention_nnz": 473856, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2320896, "linear_attention_total": 2359296, "linear_attention_nnz": 1497600, "linear_dense_total": 4718592, "linear_dense_nnz": 823296}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1788672, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 832512}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1891584, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1520640, "linear_attention_total": 2359296, "linear_attention_nnz": 600576, "linear_dense_total": 4718592, "linear_dense_nnz": 920064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 732672, "linear_attention_total": 2359296, "linear_attention_nnz": 230400, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277184, "linear_attention_total": 2359296, "linear_attention_nnz": 897792, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 660480, "linear_attention_total": 2359296, "linear_attention_nnz": 528384, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555264, "linear_attention_total": 2359296, "linear_attention_nnz": 257280, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.79947410116185, "linear_sparsity": 81.79615162037037}, "speed": {"eval_elapsed_time": 12.832387157017365}, "opt_eval_metrics": {"exact_match": 76.0170293282876, "f1": 84.47498508158148}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39382274, "linear_total": 84934656, "linear_nnz": 15423744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1793280, "linear_attention_total": 2359296, "linear_attention_nnz": 1323264, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1115136, "linear_attention_total": 2359296, "linear_attention_nnz": 470016, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2321664, "linear_attention_total": 2359296, "linear_attention_nnz": 1496832, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1789440, "linear_attention_total": 2359296, "linear_attention_nnz": 960000, "linear_dense_total": 4718592, "linear_dense_nnz": 829440}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1843968, "linear_attention_total": 2359296, "linear_attention_nnz": 917760, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526016, "linear_attention_total": 2359296, "linear_attention_nnz": 607488, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 567552, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730368, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1279488, "linear_attention_total": 2359296, "linear_attention_nnz": 900096, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667392, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 556032, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.83403273736522, "linear_sparsity": 81.84045862268519}, "speed": {"eval_elapsed_time": 12.66309662302956}, "opt_eval_metrics": {"exact_match": 75.93188268684958, "f1": 84.50793088999642}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 45409666, "linear_total": 84934656, "linear_nnz": 21492736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1715200, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 1234944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895424, "linear_attention_total": 2359296, "linear_attention_nnz": 400384, "linear_dense_total": 4718592, "linear_dense_nnz": 1495040}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3012608, "linear_attention_total": 2359296, "linear_attention_nnz": 594944, "linear_dense_total": 4718592, "linear_dense_nnz": 2417664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3094528, "linear_attention_total": 2359296, "linear_attention_nnz": 813056, "linear_dense_total": 4718592, "linear_dense_nnz": 2281472}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2762752, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 2163712}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2707456, "linear_attention_total": 2359296, "linear_attention_nnz": 562176, "linear_dense_total": 4718592, "linear_dense_nnz": 2145280}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2233344, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1741824, "linear_attention_total": 2359296, "linear_attention_nnz": 678912, "linear_dense_total": 4718592, "linear_dense_nnz": 1062912}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 709632, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 370688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 524288, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 164864}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 593920, "linear_attention_total": 2359296, "linear_attention_nnz": 267264, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 163840, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}}, "total_sparsity": 58.29889117212532, "linear_sparsity": 74.6949749228395}, "speed": {"eval_elapsed_time": 14.564574090065435}, "opt_eval_metrics": {"exact_match": 3.9829706717123936, "f1": 16.040742076098137}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 44308674, "linear_total": 84934656, "linear_nnz": 20392960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1598464, "linear_attention_total": 2359296, "linear_attention_nnz": 458752, "linear_dense_total": 4718592, "linear_dense_nnz": 1139712}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825792, "linear_attention_total": 2359296, "linear_attention_nnz": 398336, "linear_dense_total": 4718592, "linear_dense_nnz": 1427456}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2854912, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 2257920}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2905088, "linear_attention_total": 2359296, "linear_attention_nnz": 781312, "linear_dense_total": 4718592, "linear_dense_nnz": 2123776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2643968, "linear_attention_total": 2359296, "linear_attention_nnz": 620544, "linear_dense_total": 4718592, "linear_dense_nnz": 2023424}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2543616, "linear_attention_total": 2359296, "linear_attention_nnz": 573440, "linear_dense_total": 4718592, "linear_dense_nnz": 1970176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2049024, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 1588224}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1708032, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 666624, "linear_attention_total": 2359296, "linear_attention_nnz": 307200, "linear_dense_total": 4718592, "linear_dense_nnz": 359424}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489472, "linear_attention_total": 2359296, "linear_attention_nnz": 327680, "linear_dense_total": 4718592, "linear_dense_nnz": 161792}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598016, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 509952, "linear_attention_total": 2359296, "linear_attention_nnz": 162816, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}}, "total_sparsity": 59.309966373837206, "linear_sparsity": 75.98982445987654}, "speed": {"eval_elapsed_time": 14.267318818019703}, "opt_eval_metrics": {"exact_match": 3.8883632923368023, "f1": 15.870241243967634}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 43172098, "linear_total": 84934656, "linear_nnz": 19257344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1469440, "linear_attention_total": 2359296, "linear_attention_nnz": 443392, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 396288, "linear_dense_total": 4718592, "linear_dense_nnz": 1296384}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692096, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 2113536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2728960, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 1973248}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2531328, "linear_attention_total": 2359296, "linear_attention_nnz": 565248, "linear_dense_total": 4718592, "linear_dense_nnz": 1966080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434048, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 1887232}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1978368, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 1502208}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638400, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1000448}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 620544, "linear_attention_total": 2359296, "linear_attention_nnz": 310272, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 457728, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 308224}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 467968, "linear_attention_total": 2359296, "linear_attention_nnz": 152576, "linear_dense_total": 4718592, "linear_dense_nnz": 315392}}, "total_sparsity": 60.35371946964616, "linear_sparsity": 77.3268711419753}, "speed": {"eval_elapsed_time": 14.267447887919843}, "opt_eval_metrics": {"exact_match": 4.768211920529802, "f1": 16.20417331173374}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42975330, "linear_total": 84934656, "linear_nnz": 19060736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1463296, "linear_attention_total": 2359296, "linear_attention_nnz": 455680, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1699840, "linear_attention_total": 2359296, "linear_attention_nnz": 399360, "linear_dense_total": 4718592, "linear_dense_nnz": 1300480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2724864, "linear_attention_total": 2359296, "linear_attention_nnz": 544768, "linear_dense_total": 4718592, "linear_dense_nnz": 2180096}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2670592, "linear_attention_total": 2359296, "linear_attention_nnz": 731136, "linear_dense_total": 4718592, "linear_dense_nnz": 1939456}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2498560, "linear_attention_total": 2359296, "linear_attention_nnz": 557056, "linear_dense_total": 4718592, "linear_dense_nnz": 1941504}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2407424, "linear_attention_total": 2359296, "linear_attention_nnz": 527360, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1928192, "linear_attention_total": 2359296, "linear_attention_nnz": 472064, "linear_dense_total": 4718592, "linear_dense_nnz": 1456128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 607232, "linear_dense_total": 4718592, "linear_dense_nnz": 977920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 317440}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455680, "linear_attention_total": 2359296, "linear_attention_nnz": 308224, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 240640, "linear_dense_total": 4718592, "linear_dense_nnz": 305152}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 329728}}, "total_sparsity": 60.53441764482857, "linear_sparsity": 77.55835262345678}, "speed": {"eval_elapsed_time": 14.274685407988727}, "opt_eval_metrics": {"exact_match": 3.8315988647114474, "f1": 15.72666349553447}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53955042, "linear_total": 84934656, "linear_nnz": 30029824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2392064, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 1844224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2719744, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 2172928}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3872768, "linear_attention_total": 2359296, "linear_attention_nnz": 675840, "linear_dense_total": 4718592, "linear_dense_nnz": 3196928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4077568, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 3111936}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4003840, "linear_attention_total": 2359296, "linear_attention_nnz": 896000, "linear_dense_total": 4718592, "linear_dense_nnz": 3107840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3832832, "linear_attention_total": 2359296, "linear_attention_nnz": 696320, "linear_dense_total": 4718592, "linear_dense_nnz": 3136512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3280896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 2525184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2510848, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1711104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257472, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 747520}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 420864, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 748544, "linear_attention_total": 2359296, "linear_attention_nnz": 356352, "linear_dense_total": 4718592, "linear_dense_nnz": 392192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 651264, "linear_attention_total": 2359296, "linear_attention_nnz": 217088, "linear_dense_total": 4718592, "linear_dense_nnz": 434176}}, "total_sparsity": 50.45140657377771, "linear_sparsity": 64.6436149691358}, "speed": {"eval_elapsed_time": 16.048874086001888}, "opt_eval_metrics": {"exact_match": 2.686849574266793, "f1": 12.912101470328441}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 55275810, "linear_total": 84934656, "linear_nnz": 31358976, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4016640, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 2542080}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3340800, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 2652672}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4039680, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2860032}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4041216, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2763264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3187200, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 2499072}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2870784, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 2477568}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2580480, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1990656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815552, "linear_attention_total": 2359296, "linear_attention_nnz": 245760, "linear_dense_total": 4718592, "linear_dense_nnz": 1569792}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1893888, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1107456}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 714240, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 517632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1281024, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 789504}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1577472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1282560}}, "total_sparsity": 49.23850423478289, "linear_sparsity": 63.078703703703695}, "speed": {"eval_elapsed_time": 16.857338295085356}, "opt_eval_metrics": {"exact_match": 78.21192052980132, "f1": 86.01032921346379}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47626001, "linear_total": 84934656, "linear_nnz": 23714304, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2895360, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2330112, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3240960, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2061312}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3095040, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1915392}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2291712, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1800192}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2221056, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1827840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1861632, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1300992, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 1104384}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637376, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 582144, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 385536}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1052160, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 560640}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1205760, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 910848}}, "total_sparsity": 56.2635617989908, "linear_sparsity": 72.07935474537037}, "speed": {"eval_elapsed_time": 13.838669790071435}, "opt_eval_metrics": {"exact_match": 77.78618732261117, "f1": 85.70556837897196}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 50726818, "linear_total": 84934656, "linear_nnz": 26803200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2117632, "linear_attention_total": 2359296, "linear_attention_nnz": 757760, "linear_dense_total": 4718592, "linear_dense_nnz": 1359872}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2221056, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1583104}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3328000, "linear_attention_total": 2359296, "linear_attention_nnz": 900096, "linear_dense_total": 4718592, "linear_dense_nnz": 2427904}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3734528, "linear_attention_total": 2359296, "linear_attention_nnz": 1215488, "linear_dense_total": 4718592, "linear_dense_nnz": 2519040}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3650560, "linear_attention_total": 2359296, "linear_attention_nnz": 1223680, "linear_dense_total": 4718592, "linear_dense_nnz": 2426880}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3445760, "linear_attention_total": 2359296, "linear_attention_nnz": 1035264, "linear_dense_total": 4718592, "linear_dense_nnz": 2410496}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2830336, "linear_attention_total": 2359296, "linear_attention_nnz": 1011712, "linear_dense_total": 4718592, "linear_dense_nnz": 1818624}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2111488, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 1195008}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1225728, "linear_attention_total": 2359296, "linear_attention_nnz": 860160, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 765952, "linear_attention_total": 2359296, "linear_attention_nnz": 603136, "linear_dense_total": 4718592, "linear_dense_nnz": 162816}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 745472, "linear_attention_total": 2359296, "linear_attention_nnz": 418816, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 302080, "linear_dense_total": 4718592, "linear_dense_nnz": 324608}}, "total_sparsity": 53.41598509203321, "linear_sparsity": 68.4425636574074}, "speed": {"eval_elapsed_time": 17.367716077016667}, "opt_eval_metrics": {"exact_match": 2.961210974456008, "f1": 12.790407516365663}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 45695714, "linear_total": 84934656, "linear_nnz": 21777408, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1549312, "linear_attention_total": 2359296, "linear_attention_nnz": 679936, "linear_dense_total": 4718592, "linear_dense_nnz": 869376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1868800, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 1269760}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2739200, "linear_attention_total": 2359296, "linear_attention_nnz": 875520, "linear_dense_total": 4718592, "linear_dense_nnz": 1863680}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3088384, "linear_attention_total": 2359296, "linear_attention_nnz": 1137664, "linear_dense_total": 4718592, "linear_dense_nnz": 1950720}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2821120, "linear_attention_total": 2359296, "linear_attention_nnz": 1033216, "linear_dense_total": 4718592, "linear_dense_nnz": 1787904}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2709504, "linear_attention_total": 2359296, "linear_attention_nnz": 850944, "linear_dense_total": 4718592, "linear_dense_nnz": 1858560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225152, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1426432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 987136}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049600, "linear_attention_total": 2359296, "linear_attention_nnz": 782336, "linear_dense_total": 4718592, "linear_dense_nnz": 267264}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649216, "linear_attention_total": 2359296, "linear_attention_nnz": 504832, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 662528, "linear_attention_total": 2359296, "linear_attention_nnz": 379904, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 548864, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 290816}}, "total_sparsity": 58.036204395746125, "linear_sparsity": 74.35980902777779}, "speed": {"eval_elapsed_time": 16.936548358993605}, "opt_eval_metrics": {"exact_match": 6.556291390728477, "f1": 18.823302848463506}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 44413282, "linear_total": 84934656, "linear_nnz": 20496384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1436672, "linear_attention_total": 2359296, "linear_attention_nnz": 647168, "linear_dense_total": 4718592, "linear_dense_nnz": 789504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798144, "linear_attention_total": 2359296, "linear_attention_nnz": 591872, "linear_dense_total": 4718592, "linear_dense_nnz": 1206272}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2583552, "linear_attention_total": 2359296, "linear_attention_nnz": 843776, "linear_dense_total": 4718592, "linear_dense_nnz": 1739776}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2975744, "linear_attention_total": 2359296, "linear_attention_nnz": 1118208, "linear_dense_total": 4718592, "linear_dense_nnz": 1857536}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1760256}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2509824, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 1718272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2085888, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 1330176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1731584, "linear_attention_total": 2359296, "linear_attention_nnz": 827392, "linear_dense_total": 4718592, "linear_dense_nnz": 904192}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 726016, "linear_dense_total": 4718592, "linear_dense_nnz": 257024}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 464896, "linear_dense_total": 4718592, "linear_dense_nnz": 118784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 622592, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 263168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 512000, "linear_attention_total": 2359296, "linear_attention_nnz": 240640, "linear_dense_total": 4718592, "linear_dense_nnz": 271360}}, "total_sparsity": 59.21390159343854, "linear_sparsity": 75.86805555555556}, "speed": {"eval_elapsed_time": 16.834375079954043}, "opt_eval_metrics": {"exact_match": 6.0927152317880795, "f1": 17.643856165589675}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41117954, "linear_total": 84934656, "linear_nnz": 17159424, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879296, "linear_attention_total": 2359296, "linear_attention_nnz": 1459968, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1487616, "linear_attention_total": 2359296, "linear_attention_nnz": 930048, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2451456, "linear_attention_total": 2359296, "linear_attention_nnz": 1651200, "linear_dense_total": 4718592, "linear_dense_nnz": 800256}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959168, "linear_attention_total": 2359296, "linear_attention_nnz": 1181952, "linear_dense_total": 4718592, "linear_dense_nnz": 777216}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1876992, "linear_attention_total": 2359296, "linear_attention_nnz": 996864, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 720384, "linear_dense_total": 4718592, "linear_dense_nnz": 886272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1242624, "linear_attention_total": 2359296, "linear_attention_nnz": 595968, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1026048, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1362432, "linear_attention_total": 2359296, "linear_attention_nnz": 1029120, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784128, "linear_attention_total": 2359296, "linear_attention_nnz": 673536, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563712, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.240103802270966, "linear_sparsity": 79.7969111689815}, "speed": {"eval_elapsed_time": 14.605739500955679}, "opt_eval_metrics": {"exact_match": 78.01324503311258, "f1": 85.85711399770457}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41037314, "linear_total": 84934656, "linear_nnz": 17078784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1881600, "linear_attention_total": 2359296, "linear_attention_nnz": 1460736, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488384, "linear_attention_total": 2359296, "linear_attention_nnz": 930816, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2430720, "linear_attention_total": 2359296, "linear_attention_nnz": 1636608, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1953024, "linear_attention_total": 2359296, "linear_attention_nnz": 1172736, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1822464, "linear_attention_total": 2359296, "linear_attention_nnz": 946944, "linear_dense_total": 4718592, "linear_dense_nnz": 875520}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602816, "linear_attention_total": 2359296, "linear_attention_nnz": 719616, "linear_dense_total": 4718592, "linear_dense_nnz": 883200}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1248768, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023744, "linear_attention_total": 2359296, "linear_attention_nnz": 530688, "linear_dense_total": 4718592, "linear_dense_nnz": 493056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360128, "linear_attention_total": 2359296, "linear_attention_nnz": 1026816, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 785664, "linear_attention_total": 2359296, "linear_attention_nnz": 675072, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 562176, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.31415802270676, "linear_sparsity": 79.89185474537037}, "speed": {"eval_elapsed_time": 14.334042510017753}, "opt_eval_metrics": {"exact_match": 78.02270577105014, "f1": 85.8869692285446}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 70240546, "linear_total": 84934656, "linear_nnz": 46302208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4364288, "linear_attention_total": 2359296, "linear_attention_nnz": 770048, "linear_dense_total": 4718592, "linear_dense_nnz": 3594240}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4529152, "linear_attention_total": 2359296, "linear_attention_nnz": 724992, "linear_dense_total": 4718592, "linear_dense_nnz": 3804160}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5151744, "linear_attention_total": 2359296, "linear_attention_nnz": 1142784, "linear_dense_total": 4718592, "linear_dense_nnz": 4008960}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5341184, "linear_attention_total": 2359296, "linear_attention_nnz": 1333248, "linear_dense_total": 4718592, "linear_dense_nnz": 4007936}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5542912, "linear_attention_total": 2359296, "linear_attention_nnz": 1481728, "linear_dense_total": 4718592, "linear_dense_nnz": 4061184}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5168128, "linear_attention_total": 2359296, "linear_attention_nnz": 1220608, "linear_dense_total": 4718592, "linear_dense_nnz": 3947520}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5190656, "linear_attention_total": 2359296, "linear_attention_nnz": 1311744, "linear_dense_total": 4718592, "linear_dense_nnz": 3878912}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4362240, "linear_attention_total": 2359296, "linear_attention_nnz": 1070080, "linear_dense_total": 4718592, "linear_dense_nnz": 3292160}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2987008, "linear_attention_total": 2359296, "linear_attention_nnz": 1006592, "linear_dense_total": 4718592, "linear_dense_nnz": 1980416}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437696, "linear_attention_total": 2359296, "linear_attention_nnz": 684032, "linear_dense_total": 4718592, "linear_dense_nnz": 753664}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193984, "linear_attention_total": 2359296, "linear_attention_nnz": 571392, "linear_dense_total": 4718592, "linear_dense_nnz": 622592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1033216, "linear_attention_total": 2359296, "linear_attention_nnz": 368640, "linear_dense_total": 4718592, "linear_dense_nnz": 664576}}, "total_sparsity": 35.49592166400568, "linear_sparsity": 45.48490547839506}, "speed": {"eval_elapsed_time": 18.76606000494212}, "opt_eval_metrics": {"exact_match": 1.6556291390728477, "f1": 9.690749776755068}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 65744386, "linear_total": 84934656, "linear_nnz": 41809920, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3775488, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 3140608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4079616, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 3477504}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4937728, "linear_attention_total": 2359296, "linear_attention_nnz": 1008640, "linear_dense_total": 4718592, "linear_dense_nnz": 3929088}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5286912, "linear_attention_total": 2359296, "linear_attention_nnz": 1197056, "linear_dense_total": 4718592, "linear_dense_nnz": 4089856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5135360, "linear_attention_total": 2359296, "linear_attention_nnz": 1181696, "linear_dense_total": 4718592, "linear_dense_nnz": 3953664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5012480, "linear_attention_total": 2359296, "linear_attention_nnz": 1005568, "linear_dense_total": 4718592, "linear_dense_nnz": 4006912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4720640, "linear_attention_total": 2359296, "linear_attention_nnz": 1043456, "linear_dense_total": 4718592, "linear_dense_nnz": 3677184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3708928, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 2777088}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2311168, "linear_attention_total": 2359296, "linear_attention_nnz": 862208, "linear_dense_total": 4718592, "linear_dense_nnz": 1448960}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1058816, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 458752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 951296, "linear_attention_total": 2359296, "linear_attention_nnz": 456704, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 831488, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 541696}}, "total_sparsity": 39.6248852522324, "linear_sparsity": 50.774016203703695}, "speed": {"eval_elapsed_time": 17.513682276010513}, "opt_eval_metrics": {"exact_match": 2.2327341532639546, "f1": 11.393739680219062}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 38110440, "linear_total": 84934656, "linear_nnz": 14151910, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1521793, "linear_attention_total": 2359296, "linear_attention_nnz": 87221, "linear_dense_total": 4718592, "linear_dense_nnz": 1434572}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637844, "linear_attention_total": 2359296, "linear_attention_nnz": 157517, "linear_dense_total": 4718592, "linear_dense_nnz": 1480327}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1723746, "linear_attention_total": 2359296, "linear_attention_nnz": 188172, "linear_dense_total": 4718592, "linear_dense_nnz": 1535574}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742961, "linear_attention_total": 2359296, "linear_attention_nnz": 230341, "linear_dense_total": 4718592, "linear_dense_nnz": 1512620}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1687428, "linear_attention_total": 2359296, "linear_attention_nnz": 240387, "linear_dense_total": 4718592, "linear_dense_nnz": 1447041}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1623377, "linear_attention_total": 2359296, "linear_attention_nnz": 195780, "linear_dense_total": 4718592, "linear_dense_nnz": 1427597}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1429982, "linear_attention_total": 2359296, "linear_attention_nnz": 184963, "linear_dense_total": 4718592, "linear_dense_nnz": 1245019}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130199, "linear_attention_total": 2359296, "linear_attention_nnz": 172954, "linear_dense_total": 4718592, "linear_dense_nnz": 957245}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 773896, "linear_attention_total": 2359296, "linear_attention_nnz": 138133, "linear_dense_total": 4718592, "linear_dense_nnz": 635763}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417863, "linear_attention_total": 2359296, "linear_attention_nnz": 112972, "linear_dense_total": 4718592, "linear_dense_nnz": 304891}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279992, "linear_attention_total": 2359296, "linear_attention_nnz": 75446, "linear_dense_total": 4718592, "linear_dense_nnz": 204546}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 182829, "linear_attention_total": 2359296, "linear_attention_nnz": 38439, "linear_dense_total": 4718592, "linear_dense_nnz": 144390}}, "total_sparsity": 65.00199746198996, "linear_sparsity": 83.3378850677867}, "speed": {"eval_elapsed_time": 78.46566343901213, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37366845, "linear_total": 84934656, "linear_nnz": 13408315, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1442154, "linear_attention_total": 2359296, "linear_attention_nnz": 79341, "linear_dense_total": 4718592, "linear_dense_nnz": 1362813}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1557975, "linear_attention_total": 2359296, "linear_attention_nnz": 146964, "linear_dense_total": 4718592, "linear_dense_nnz": 1411011}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637409, "linear_attention_total": 2359296, "linear_attention_nnz": 173655, "linear_dense_total": 4718592, "linear_dense_nnz": 1463754}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1655712, "linear_attention_total": 2359296, "linear_attention_nnz": 213353, "linear_dense_total": 4718592, "linear_dense_nnz": 1442359}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601748, "linear_attention_total": 2359296, "linear_attention_nnz": 221518, "linear_dense_total": 4718592, "linear_dense_nnz": 1380230}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1539647, "linear_attention_total": 2359296, "linear_attention_nnz": 179373, "linear_dense_total": 4718592, "linear_dense_nnz": 1360274}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1352289, "linear_attention_total": 2359296, "linear_attention_nnz": 168393, "linear_dense_total": 4718592, "linear_dense_nnz": 1183896}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1066215, "linear_attention_total": 2359296, "linear_attention_nnz": 159612, "linear_dense_total": 4718592, "linear_dense_nnz": 906603}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727923, "linear_attention_total": 2359296, "linear_attention_nnz": 127230, "linear_dense_total": 4718592, "linear_dense_nnz": 600693}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 390947, "linear_attention_total": 2359296, "linear_attention_nnz": 105257, "linear_dense_total": 4718592, "linear_dense_nnz": 285690}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 262617, "linear_attention_total": 2359296, "linear_attention_nnz": 70746, "linear_dense_total": 4718592, "linear_dense_nnz": 191871}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 173679, "linear_attention_total": 2359296, "linear_attention_nnz": 36271, "linear_dense_total": 4718592, "linear_dense_nnz": 137408}}, "total_sparsity": 65.68486388119823, "linear_sparsity": 84.21337575088313}, "speed": {"eval_elapsed_time": 78.30115663801553, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 45252556, "linear_total": 84934656, "linear_nnz": 21294026, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2152743, "linear_attention_total": 2359296, "linear_attention_nnz": 158912, "linear_dense_total": 4718592, "linear_dense_nnz": 1993831}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2265132, "linear_attention_total": 2359296, "linear_attention_nnz": 234395, "linear_dense_total": 4718592, "linear_dense_nnz": 2030737}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415512, "linear_attention_total": 2359296, "linear_attention_nnz": 301048, "linear_dense_total": 4718592, "linear_dense_nnz": 2114464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465567, "linear_attention_total": 2359296, "linear_attention_nnz": 358791, "linear_dense_total": 4718592, "linear_dense_nnz": 2106776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2457267, "linear_attention_total": 2359296, "linear_attention_nnz": 398673, "linear_dense_total": 4718592, "linear_dense_nnz": 2058594}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2410577, "linear_attention_total": 2359296, "linear_attention_nnz": 367333, "linear_dense_total": 4718592, "linear_dense_nnz": 2043244}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2206780, "linear_attention_total": 2359296, "linear_attention_nnz": 344288, "linear_dense_total": 4718592, "linear_dense_nnz": 1862492}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1819031, "linear_attention_total": 2359296, "linear_attention_nnz": 304514, "linear_dense_total": 4718592, "linear_dense_nnz": 1514517}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1364821, "linear_attention_total": 2359296, "linear_attention_nnz": 265513, "linear_dense_total": 4718592, "linear_dense_nnz": 1099308}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 828990, "linear_attention_total": 2359296, "linear_attention_nnz": 201714, "linear_dense_total": 4718592, "linear_dense_nnz": 627276}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574541, "linear_attention_total": 2359296, "linear_attention_nnz": 134277, "linear_dense_total": 4718592, "linear_dense_nnz": 440264}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 333065, "linear_attention_total": 2359296, "linear_attention_nnz": 63309, "linear_dense_total": 4718592, "linear_dense_nnz": 269756}}, "total_sparsity": 58.4431701722824, "linear_sparsity": 74.92893124804085}, "speed": {"eval_elapsed_time": 81.4040583850001, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 31863042, "linear_total": 84934656, "linear_nnz": 7904512, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 562688, "linear_attention_total": 2359296, "linear_attention_nnz": 260096, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852736, "linear_attention_total": 2359296, "linear_attention_nnz": 361216, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 836352, "linear_attention_total": 2359296, "linear_attention_nnz": 249600, "linear_dense_total": 4718592, "linear_dense_nnz": 586752}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1037824, "linear_attention_total": 2359296, "linear_attention_nnz": 487936, "linear_dense_total": 4718592, "linear_dense_nnz": 549888}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 981760, "linear_attention_total": 2359296, "linear_attention_nnz": 315136, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893184, "linear_attention_total": 2359296, "linear_attention_nnz": 329472, "linear_dense_total": 4718592, "linear_dense_nnz": 563712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621312, "linear_attention_total": 2359296, "linear_attention_nnz": 160512, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817664, "linear_attention_total": 2359296, "linear_attention_nnz": 407552, "linear_dense_total": 4718592, "linear_dense_nnz": 410112}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 446208, "linear_attention_total": 2359296, "linear_attention_nnz": 175872, "linear_dense_total": 4718592, "linear_dense_nnz": 270336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315392, "linear_attention_total": 2359296, "linear_attention_nnz": 218624, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279552, "linear_attention_total": 2359296, "linear_attention_nnz": 187392, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 259840, "linear_attention_total": 2359296, "linear_attention_nnz": 118528, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}}, "total_sparsity": 70.73917738066733, "linear_sparsity": 90.6934196566358}, "speed": {"eval_elapsed_time": 12.247032377053984}, "opt_eval_metrics": {"exact_match": 75.42100283822138, "f1": 84.06571558378387}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 38467586, "linear_total": 84934656, "linear_nnz": 14509056, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1740288, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 940032, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 448512}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1992192, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1728000, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1651200, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 717312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245696, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877056, "linear_attention_total": 2359296, "linear_attention_nnz": 442368, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049088, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 629760, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 233472}}, "total_sparsity": 64.67401918059409, "linear_sparsity": 82.9173900462963}, "speed": {"eval_elapsed_time": 12.096938933013007}, "opt_eval_metrics": {"exact_match": 77.9848628192999, "f1": 85.88807770994393}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 38065154, "linear_total": 84934656, "linear_nnz": 14106624, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1669632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 913920, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 422400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969152, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 631296}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1559040, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1219584, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 471552}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 955392, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1090560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 431616, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.04358500448319, "linear_sparsity": 83.3912037037037}, "speed": {"eval_elapsed_time": 24.926402524928562}, "opt_eval_metrics": {"exact_match": 78.29706717123936, "f1": 86.2648683969933}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38095874, "linear_total": 84934656, "linear_nnz": 14137344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907776, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1967616, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 591360}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1711104, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1608192, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1214976, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 625152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1161216, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 953856, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 413184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1041408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 482304, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 757248, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.0153738728886, "linear_sparsity": 83.35503472222221}, "speed": {"eval_elapsed_time": 11.562090583960526}, "opt_eval_metrics": {"exact_match": 77.43614001892148, "f1": 85.51882546766822}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 37879298, "linear_total": 84934656, "linear_nnz": 13920768, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1160960, "linear_attention_total": 2359296, "linear_attention_nnz": 454400, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245184, "linear_attention_total": 2359296, "linear_attention_nnz": 400384, "linear_dense_total": 4718592, "linear_dense_nnz": 844800}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553408, "linear_attention_total": 2359296, "linear_attention_nnz": 518144, "linear_dense_total": 4718592, "linear_dense_nnz": 1035264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1787648, "linear_attention_total": 2359296, "linear_attention_nnz": 803072, "linear_dense_total": 4718592, "linear_dense_nnz": 984576}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1640960, "linear_attention_total": 2359296, "linear_attention_nnz": 555008, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661696, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1015296}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383680, "linear_attention_total": 2359296, "linear_attention_nnz": 583424, "linear_dense_total": 4718592, "linear_dense_nnz": 800256}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1309440, "linear_attention_total": 2359296, "linear_attention_nnz": 652032, "linear_dense_total": 4718592, "linear_dense_nnz": 657408}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 689664, "linear_attention_total": 2359296, "linear_attention_nnz": 333312, "linear_dense_total": 4718592, "linear_dense_nnz": 356352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520960, "linear_attention_total": 2359296, "linear_attention_nnz": 382720, "linear_dense_total": 4718592, "linear_dense_nnz": 138240}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 461568, "linear_attention_total": 2359296, "linear_attention_nnz": 314112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 505600, "linear_attention_total": 2359296, "linear_attention_nnz": 203008, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}}, "total_sparsity": 65.21426235063046, "linear_sparsity": 83.61002604166666}, "speed": {"eval_elapsed_time": 14.720699563040398}, "opt_eval_metrics": {"exact_match": 76.82119205298014, "f1": 85.28474303662432}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35435778, "linear_total": 84934656, "linear_nnz": 11477248, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887040, "linear_attention_total": 2359296, "linear_attention_nnz": 384768, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1057792, "linear_attention_total": 2359296, "linear_attention_nnz": 355840, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1285888, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497088, "linear_attention_total": 2359296, "linear_attention_nnz": 672256, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1350912, "linear_attention_total": 2359296, "linear_attention_nnz": 418560, "linear_dense_total": 4718592, "linear_dense_nnz": 932352}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1395712, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1154816, "linear_attention_total": 2359296, "linear_attention_nnz": 498944, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059840, "linear_attention_total": 2359296, "linear_attention_nnz": 497664, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 609024, "linear_attention_total": 2359296, "linear_attention_nnz": 297216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 436224, "linear_attention_total": 2359296, "linear_attention_nnz": 316416, "linear_dense_total": 4718592, "linear_dense_nnz": 119808}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 256512, "linear_dense_total": 4718592, "linear_dense_nnz": 115200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371200, "linear_attention_total": 2359296, "linear_attention_nnz": 150016, "linear_dense_total": 4718592, "linear_dense_nnz": 221184}}, "total_sparsity": 67.45822277621669, "linear_sparsity": 86.4869671103395}, "speed": {"eval_elapsed_time": 13.966550998971798}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.29050695680083}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 49759613, "linear_total": 84934656, "linear_nnz": 25846272, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3251712, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2803200, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1918464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3320832, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3353088, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2075136}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2469888, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2322432, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1929216}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2098176, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1508352}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1641984, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1248768}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638912, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566784, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1075200, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 583680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1304064, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1009152}}, "total_sparsity": 54.304199529987116, "linear_sparsity": 69.56922743055556}, "speed": {"eval_elapsed_time": 14.872020053910092}, "opt_eval_metrics": {"exact_match": 77.8713339640492, "f1": 85.86552240887988}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 47529298, "linear_total": 84934656, "linear_nnz": 23617536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2446848, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1660416}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2978304, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1995264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2216448, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1724928}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1824768, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1333248}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526784, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1133568}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35236717199184, "linear_sparsity": 72.19328703703704}, "speed": {"eval_elapsed_time": 13.847230941057205}, "opt_eval_metrics": {"exact_match": 78.06054872280038, "f1": 85.94002543374285}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47521613, "linear_total": 84934656, "linear_nnz": 23609856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2445312, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1658880}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2976768, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1993728}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214912, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1723392}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1823232, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1525248, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1132032}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35942454654601, "linear_sparsity": 72.2023292824074}, "speed": {"eval_elapsed_time": 13.841004910878837}, "opt_eval_metrics": {"exact_match": 78.10785241248817, "f1": 86.00835164251778}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35398714, "linear_total": 84934656, "linear_nnz": 11493376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907264, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1074176, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253376, "linear_attention_total": 2359296, "linear_attention_nnz": 402432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1508352, "linear_attention_total": 2359296, "linear_attention_nnz": 681984, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1328640, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1422848, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1094656, "linear_attention_total": 2359296, "linear_attention_nnz": 449536, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1102848, "linear_attention_total": 2359296, "linear_attention_nnz": 577536, "linear_dense_total": 4718592, "linear_dense_nnz": 525312}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 628224, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 434176, "linear_attention_total": 2359296, "linear_attention_nnz": 320512, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 377344, "linear_attention_total": 2359296, "linear_attention_nnz": 256000, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 361472, "linear_attention_total": 2359296, "linear_attention_nnz": 146432, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 67.49225980035152, "linear_sparsity": 86.46797839506173}, "speed": {"eval_elapsed_time": 13.929598903981969}, "opt_eval_metrics": {"exact_match": 77.18070009460737, "f1": 85.6109462422114}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold_apme-sigmoied_threshold_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 61067266, "linear_total": 84934656, "linear_nnz": 37108736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3221504, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 2607104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3504128, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 2899968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4439040, "linear_attention_total": 2359296, "linear_attention_nnz": 730112, "linear_dense_total": 4718592, "linear_dense_nnz": 3708928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4859904, "linear_attention_total": 2359296, "linear_attention_nnz": 1044480, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4734976, "linear_attention_total": 2359296, "linear_attention_nnz": 1012736, "linear_dense_total": 4718592, "linear_dense_nnz": 3722240}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4660224, "linear_attention_total": 2359296, "linear_attention_nnz": 882688, "linear_dense_total": 4718592, "linear_dense_nnz": 3777536}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4239360, "linear_attention_total": 2359296, "linear_attention_nnz": 980992, "linear_dense_total": 4718592, "linear_dense_nnz": 3258368}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3137536, "linear_attention_total": 2359296, "linear_attention_nnz": 903168, "linear_dense_total": 4718592, "linear_dense_nnz": 2234368}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835008, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 1124352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877568, "linear_attention_total": 2359296, "linear_attention_nnz": 552960, "linear_dense_total": 4718592, "linear_dense_nnz": 324608}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852992, "linear_attention_total": 2359296, "linear_attention_nnz": 401408, "linear_dense_total": 4718592, "linear_dense_nnz": 451584}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746496, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 501760}}, "total_sparsity": 43.920030037508496, "linear_sparsity": 56.309076003086425}, "speed": {"eval_elapsed_time": 47.75363156700041}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-30000": {"stats": {"total": 108893186, "nnz": 67034114, "linear_total": 84934656, "linear_nnz": 43075584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4692480, "linear_attention_total": 2359296, "linear_attention_nnz": 892416, "linear_dense_total": 4718592, "linear_dense_nnz": 3800064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4379136, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 3657216}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4735488, "linear_attention_total": 2359296, "linear_attention_nnz": 920064, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4850688, "linear_attention_total": 2359296, "linear_attention_nnz": 1052160, "linear_dense_total": 4718592, "linear_dense_nnz": 3798528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4751616, "linear_attention_total": 2359296, "linear_attention_nnz": 1118976, "linear_dense_total": 4718592, "linear_dense_nnz": 3632640}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4547328, "linear_attention_total": 2359296, "linear_attention_nnz": 1017600, "linear_dense_total": 4718592, "linear_dense_nnz": 3529728}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4306944, "linear_attention_total": 2359296, "linear_attention_nnz": 1061376, "linear_dense_total": 4718592, "linear_dense_nnz": 3245568}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3525888, "linear_attention_total": 2359296, "linear_attention_nnz": 793344, "linear_dense_total": 4718592, "linear_dense_nnz": 2732544}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2793216, "linear_attention_total": 2359296, "linear_attention_nnz": 919296, "linear_dense_total": 4718592, "linear_dense_nnz": 1873920}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1507584, "linear_attention_total": 2359296, "linear_attention_nnz": 541440, "linear_dense_total": 4718592, "linear_dense_nnz": 966144}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130496, "linear_attention_total": 2359296, "linear_attention_nnz": 443904, "linear_dense_total": 4718592, "linear_dense_nnz": 686592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1854720, "linear_attention_total": 2359296, "linear_attention_nnz": 332544, "linear_dense_total": 4718592, "linear_dense_nnz": 1522176}}, "total_sparsity": 38.44048791078626, "linear_sparsity": 49.283854166666664}, "speed": {"eval_elapsed_time": 30.27796263305936}, "opt_eval_metrics": {"exact_match": 79.40397350993378, "f1": 86.95662988564573}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 43891202, "linear_total": 84934656, "linear_nnz": 19932672, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045184, "linear_attention_total": 2359296, "linear_attention_nnz": 427776, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102784, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 1708032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2424576, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 1955328}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2502912, "linear_attention_total": 2359296, "linear_attention_nnz": 579840, "linear_dense_total": 4718592, "linear_dense_nnz": 1923072}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2376960, "linear_attention_total": 2359296, "linear_attention_nnz": 539904, "linear_dense_total": 4718592, "linear_dense_nnz": 1837056}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2201856, "linear_attention_total": 2359296, "linear_attention_nnz": 424704, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1907712, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 1468416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1580544, "linear_attention_total": 2359296, "linear_attention_nnz": 428544, "linear_dense_total": 4718592, "linear_dense_nnz": 1152000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1095168, "linear_attention_total": 2359296, "linear_attention_nnz": 397824, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527616, "linear_attention_total": 2359296, "linear_attention_nnz": 235776, "linear_dense_total": 4718592, "linear_dense_nnz": 291840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 428544, "linear_attention_total": 2359296, "linear_attention_nnz": 182784, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738816, "linear_attention_total": 2359296, "linear_attention_nnz": 112128, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}}, "total_sparsity": 59.6933438975695, "linear_sparsity": 76.53175636574075}, "speed": {"eval_elapsed_time": 20.700779567006975}, "opt_eval_metrics": {"exact_match": 79.13907284768212, "f1": 86.92362610004827}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49229570, "linear_total": 84934656, "linear_nnz": 25271040, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214400, "linear_attention_total": 2359296, "linear_attention_nnz": 721408, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2390784, "linear_attention_total": 2359296, "linear_attention_nnz": 635136, "linear_dense_total": 4718592, "linear_dense_nnz": 1755648}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850560, "linear_attention_total": 2359296, "linear_attention_nnz": 972032, "linear_dense_total": 4718592, "linear_dense_nnz": 1878528}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3188736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 1932288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3149824, "linear_attention_total": 2359296, "linear_attention_nnz": 1260544, "linear_dense_total": 4718592, "linear_dense_nnz": 1889280}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 1784832}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2455040, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015744, "linear_attention_total": 2359296, "linear_attention_nnz": 988160, "linear_dense_total": 4718592, "linear_dense_nnz": 1027584}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1550080, "linear_attention_total": 2359296, "linear_attention_nnz": 903424, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 886784, "linear_attention_total": 2359296, "linear_attention_nnz": 636416, "linear_dense_total": 4718592, "linear_dense_nnz": 250368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 682752, "linear_attention_total": 2359296, "linear_attention_nnz": 484608, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 980224, "linear_attention_total": 2359296, "linear_attention_nnz": 313600, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}}, "total_sparsity": 54.79095450471988, "linear_sparsity": 70.2464916087963}, "speed": {"eval_elapsed_time": 20.624390312936157}, "opt_eval_metrics": {"exact_match": 80.5771050141911, "f1": 88.02575212811699}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 53943554, "linear_total": 84934656, "linear_nnz": 29985024, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3576576, "linear_attention_total": 2359296, "linear_attention_nnz": 840960, "linear_dense_total": 4718592, "linear_dense_nnz": 2735616}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070464, "linear_attention_total": 2359296, "linear_attention_nnz": 288768, "linear_dense_total": 4718592, "linear_dense_nnz": 2781696}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3222528, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 3024384}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3771648, "linear_attention_total": 2359296, "linear_attention_nnz": 770304, "linear_dense_total": 4718592, "linear_dense_nnz": 3001344}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3129600, "linear_attention_total": 2359296, "linear_attention_nnz": 393984, "linear_dense_total": 4718592, "linear_dense_nnz": 2735616}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2864640, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 2668032}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2801664, "linear_attention_total": 2359296, "linear_attention_nnz": 548352, "linear_dense_total": 4718592, "linear_dense_nnz": 2253312}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2118144, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 1920000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1646592, "linear_attention_total": 2359296, "linear_attention_nnz": 284160, "linear_dense_total": 4718592, "linear_dense_nnz": 1362432}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 861696, "linear_attention_total": 2359296, "linear_attention_nnz": 202752, "linear_dense_total": 4718592, "linear_dense_nnz": 658944}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 288000, "linear_dense_total": 4718592, "linear_dense_nnz": 907776}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725696, "linear_attention_total": 2359296, "linear_attention_nnz": 315648, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}}, "total_sparsity": 50.461956361530284, "linear_sparsity": 64.69636140046296}, "speed": {"eval_elapsed_time": 14.420848372974433}, "opt_eval_metrics": {"exact_match": 73.90728476821192, "f1": 82.48749394175648}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49808642, "linear_total": 84934656, "linear_nnz": 25850112, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2888448, "linear_attention_total": 2359296, "linear_attention_nnz": 652032, "linear_dense_total": 4718592, "linear_dense_nnz": 2236416}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2680320, "linear_attention_total": 2359296, "linear_attention_nnz": 293376, "linear_dense_total": 4718592, "linear_dense_nnz": 2386944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2864640, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 2666496}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3171072, "linear_attention_total": 2359296, "linear_attention_nnz": 530688, "linear_dense_total": 4718592, "linear_dense_nnz": 2640384}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2757888, "linear_attention_total": 2359296, "linear_attention_nnz": 392448, "linear_dense_total": 4718592, "linear_dense_nnz": 2365440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2569728, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 2373120}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2244096, "linear_attention_total": 2359296, "linear_attention_nnz": 310272, "linear_dense_total": 4718592, "linear_dense_nnz": 1933824}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1843968, "linear_attention_total": 2359296, "linear_attention_nnz": 197376, "linear_dense_total": 4718592, "linear_dense_nnz": 1646592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1384704, "linear_attention_total": 2359296, "linear_attention_nnz": 200448, "linear_dense_total": 4718592, "linear_dense_nnz": 1184256}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 204288, "linear_dense_total": 4718592, "linear_dense_nnz": 556032}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1115904, "linear_attention_total": 2359296, "linear_attention_nnz": 286464, "linear_dense_total": 4718592, "linear_dense_nnz": 829440}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1569024, "linear_attention_total": 2359296, "linear_attention_nnz": 315648, "linear_dense_total": 4718592, "linear_dense_nnz": 1253376}}, "total_sparsity": 54.25917467416189, "linear_sparsity": 69.56470630787037}, "speed": {"eval_elapsed_time": 12.429447512025945}, "opt_eval_metrics": {"exact_match": 70.05676442762535, "f1": 79.26883508935717}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 42173698, "linear_total": 84934656, "linear_nnz": 18215168, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1516544, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1758464, "linear_attention_total": 2359296, "linear_attention_nnz": 564992, "linear_dense_total": 4718592, "linear_dense_nnz": 1193472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2030080, "linear_attention_total": 2359296, "linear_attention_nnz": 646144, "linear_dense_total": 4718592, "linear_dense_nnz": 1383936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2328832, "linear_attention_total": 2359296, "linear_attention_nnz": 969472, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2283264, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1425408}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2099200, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 1396224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1846784, "linear_attention_total": 2359296, "linear_attention_nnz": 774656, "linear_dense_total": 4718592, "linear_dense_nnz": 1072128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1589760, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 783360}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 967424, "linear_attention_total": 2359296, "linear_attention_nnz": 520448, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 617216, "linear_attention_total": 2359296, "linear_attention_nnz": 435968, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 521984, "linear_attention_total": 2359296, "linear_attention_nnz": 354560, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655616, "linear_attention_total": 2359296, "linear_attention_nnz": 231680, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.27058124647028, "linear_sparsity": 78.55390383873457}, "speed": {"eval_elapsed_time": 16.997670065960847}, "opt_eval_metrics": {"exact_match": 79.2620624408704, "f1": 86.94475047733708}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42038274, "linear_total": 84934656, "linear_nnz": 18079744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1493248, "linear_attention_total": 2359296, "linear_attention_nnz": 519424, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757440, "linear_attention_total": 2359296, "linear_attention_nnz": 565504, "linear_dense_total": 4718592, "linear_dense_nnz": 1191936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028800, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1382400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2297088, "linear_attention_total": 2359296, "linear_attention_nnz": 937728, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2270464, "linear_attention_total": 2359296, "linear_attention_nnz": 846592, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081792, "linear_attention_total": 2359296, "linear_attention_nnz": 688640, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815296, "linear_attention_total": 2359296, "linear_attention_nnz": 744704, "linear_dense_total": 4718592, "linear_dense_nnz": 1070592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1613312, "linear_attention_total": 2359296, "linear_attention_nnz": 831488, "linear_dense_total": 4718592, "linear_dense_nnz": 781824}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 522496, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 594944, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 513792, "linear_attention_total": 2359296, "linear_attention_nnz": 346368, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 644096, "linear_attention_total": 2359296, "linear_attention_nnz": 220160, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.39494531824976, "linear_sparsity": 78.7133487654321}, "speed": {"eval_elapsed_time": 16.94853257900104}, "opt_eval_metrics": {"exact_match": 79.2620624408704, "f1": 86.97983371588884}}}, "base_speed_report": {"eval_elapsed_time": 38.708956059999764}} \ No newline at end of file diff --git a/analysis/files/results/results8.json b/analysis/files/results/results8.json deleted file mode 100644 index 0b89e8b3..00000000 --- a/analysis/files/results/results8.json +++ /dev/null @@ -1 +0,0 @@ -{"checkpoints": {"/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 60368184, "linear_total": 84934656, "linear_nnz": 36440832, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3788032, "linear_attention_total": 2359296, "linear_attention_nnz": 1170688, "linear_dense_total": 4718592, "linear_dense_nnz": 2617344}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3460352, "linear_attention_total": 2359296, "linear_attention_nnz": 956672, "linear_dense_total": 4718592, "linear_dense_nnz": 2503680}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4003072, "linear_attention_total": 2359296, "linear_attention_nnz": 1288960, "linear_dense_total": 4718592, "linear_dense_nnz": 2714112}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4128768, "linear_attention_total": 2359296, "linear_attention_nnz": 1483776, "linear_dense_total": 4718592, "linear_dense_nnz": 2644992}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4293120, "linear_attention_total": 2359296, "linear_attention_nnz": 1712640, "linear_dense_total": 4718592, "linear_dense_nnz": 2580480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4090112, "linear_attention_total": 2359296, "linear_attention_nnz": 1598720, "linear_dense_total": 4718592, "linear_dense_nnz": 2491392}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3693824, "linear_attention_total": 2359296, "linear_attention_nnz": 1647872, "linear_dense_total": 4718592, "linear_dense_nnz": 2045952}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2870272, "linear_attention_total": 2359296, "linear_attention_nnz": 1278976, "linear_dense_total": 4718592, "linear_dense_nnz": 1591296}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2240256, "linear_attention_total": 2359296, "linear_attention_nnz": 1321728, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1299200, "linear_attention_total": 2359296, "linear_attention_nnz": 826112, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1011456, "linear_attention_total": 2359296, "linear_attention_nnz": 676608, "linear_dense_total": 4718592, "linear_dense_nnz": 334848}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1562368, "linear_attention_total": 2359296, "linear_attention_nnz": 473344, "linear_dense_total": 4718592, "linear_dense_nnz": 1089024}}, "total_sparsity": 44.56201878416892, "linear_sparsity": 57.095449942129626}, "speed": {"eval_elapsed_time": 29.717269155895337}, "opt_eval_metrics": {"exact_match": 81.05960264900662, "f1": 88.35100701142292}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53990689, "linear_total": 84934656, "linear_nnz": 30067968, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2828544, "linear_attention_total": 2359296, "linear_attention_nnz": 880896, "linear_dense_total": 4718592, "linear_dense_nnz": 1947648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2819840, "linear_attention_total": 2359296, "linear_attention_nnz": 849152, "linear_dense_total": 4718592, "linear_dense_nnz": 1970688}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3367424, "linear_attention_total": 2359296, "linear_attention_nnz": 1169408, "linear_dense_total": 4718592, "linear_dense_nnz": 2198016}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3521280, "linear_attention_total": 2359296, "linear_attention_nnz": 1352448, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3693824, "linear_attention_total": 2359296, "linear_attention_nnz": 1524992, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3554560, "linear_attention_total": 2359296, "linear_attention_nnz": 1511680, "linear_dense_total": 4718592, "linear_dense_nnz": 2042880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2935296, "linear_attention_total": 2359296, "linear_attention_nnz": 1336320, "linear_dense_total": 4718592, "linear_dense_nnz": 1598976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2452992, "linear_attention_total": 2359296, "linear_attention_nnz": 1178112, "linear_dense_total": 4718592, "linear_dense_nnz": 1274880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1836032, "linear_attention_total": 2359296, "linear_attention_nnz": 1134080, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1024000, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 321024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 812032, "linear_attention_total": 2359296, "linear_attention_nnz": 583168, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222144, "linear_attention_total": 2359296, "linear_attention_nnz": 397312, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}}, "total_sparsity": 50.41867082482094, "linear_sparsity": 64.59870515046296}, "speed": {"eval_elapsed_time": 27.21409681579098}, "opt_eval_metrics": {"exact_match": 80.93661305581836, "f1": 88.35425478567389}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53994017, "linear_total": 84934656, "linear_nnz": 30071296, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2825984, "linear_attention_total": 2359296, "linear_attention_nnz": 878336, "linear_dense_total": 4718592, "linear_dense_nnz": 1947648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2823424, "linear_attention_total": 2359296, "linear_attention_nnz": 852736, "linear_dense_total": 4718592, "linear_dense_nnz": 1970688}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3366400, "linear_attention_total": 2359296, "linear_attention_nnz": 1168384, "linear_dense_total": 4718592, "linear_dense_nnz": 2198016}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3529216, "linear_attention_total": 2359296, "linear_attention_nnz": 1360384, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3694080, "linear_attention_total": 2359296, "linear_attention_nnz": 1525248, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3562240, "linear_attention_total": 2359296, "linear_attention_nnz": 1519360, "linear_dense_total": 4718592, "linear_dense_nnz": 2042880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2944768, "linear_attention_total": 2359296, "linear_attention_nnz": 1345792, "linear_dense_total": 4718592, "linear_dense_nnz": 1598976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2450176, "linear_attention_total": 2359296, "linear_attention_nnz": 1175296, "linear_dense_total": 4718592, "linear_dense_nnz": 1274880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828864, "linear_attention_total": 2359296, "linear_attention_nnz": 1126912, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023488, "linear_attention_total": 2359296, "linear_attention_nnz": 702464, "linear_dense_total": 4718592, "linear_dense_nnz": 321024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 812032, "linear_attention_total": 2359296, "linear_attention_nnz": 583168, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1210624, "linear_attention_total": 2359296, "linear_attention_nnz": 385792, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}}, "total_sparsity": 50.41561461889819, "linear_sparsity": 64.5947868441358}, "speed": {"eval_elapsed_time": 27.214215133106336}, "opt_eval_metrics": {"exact_match": 81.11636707663197, "f1": 88.26635621180897}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l20--2021-01-19--16-59-13/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 49113499, "linear_total": 84934656, "linear_nnz": 25174883, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2532837, "linear_attention_total": 2359296, "linear_attention_nnz": 278464, "linear_dense_total": 4718592, "linear_dense_nnz": 2254373}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2724403, "linear_attention_total": 2359296, "linear_attention_nnz": 411200, "linear_dense_total": 4718592, "linear_dense_nnz": 2313203}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2772181, "linear_attention_total": 2359296, "linear_attention_nnz": 388544, "linear_dense_total": 4718592, "linear_dense_nnz": 2383637}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2962889, "linear_attention_total": 2359296, "linear_attention_nnz": 616064, "linear_dense_total": 4718592, "linear_dense_nnz": 2346825}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2756799, "linear_attention_total": 2359296, "linear_attention_nnz": 475392, "linear_dense_total": 4718592, "linear_dense_nnz": 2281407}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2741284, "linear_attention_total": 2359296, "linear_attention_nnz": 485760, "linear_dense_total": 4718592, "linear_dense_nnz": 2255524}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2526246, "linear_attention_total": 2359296, "linear_attention_nnz": 436416, "linear_dense_total": 4718592, "linear_dense_nnz": 2089830}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2233017, "linear_attention_total": 2359296, "linear_attention_nnz": 473664, "linear_dense_total": 4718592, "linear_dense_nnz": 1759353}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652692, "linear_attention_total": 2359296, "linear_attention_nnz": 292096, "linear_dense_total": 4718592, "linear_dense_nnz": 1360596}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1056535, "linear_attention_total": 2359296, "linear_attention_nnz": 260864, "linear_dense_total": 4718592, "linear_dense_nnz": 795671}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 795434, "linear_attention_total": 2359296, "linear_attention_nnz": 207872, "linear_dense_total": 4718592, "linear_dense_nnz": 587562}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 420566, "linear_attention_total": 2359296, "linear_attention_nnz": 115648, "linear_dense_total": 4718592, "linear_dense_nnz": 304918}}, "total_sparsity": 54.89754611459343, "linear_sparsity": 70.35970452391072}, "speed": {"eval_elapsed_time": 32.5851653709542}, "opt_eval_metrics": {"exact_match": 80.15137180700094, "f1": 87.62280270760408}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 50398933, "linear_total": 84934656, "linear_nnz": 26460853, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673633, "linear_attention_total": 2359296, "linear_attention_nnz": 341248, "linear_dense_total": 4718592, "linear_dense_nnz": 2332385}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850180, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 2387588}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2871056, "linear_attention_total": 2359296, "linear_attention_nnz": 412672, "linear_dense_total": 4718592, "linear_dense_nnz": 2458384}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114067, "linear_attention_total": 2359296, "linear_attention_nnz": 692736, "linear_dense_total": 4718592, "linear_dense_nnz": 2421331}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2853594, "linear_attention_total": 2359296, "linear_attention_nnz": 505088, "linear_dense_total": 4718592, "linear_dense_nnz": 2348506}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2871518, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 2322654}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2608144, "linear_attention_total": 2359296, "linear_attention_nnz": 469504, "linear_dense_total": 4718592, "linear_dense_nnz": 2138640}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2382631, "linear_attention_total": 2359296, "linear_attention_nnz": 552448, "linear_dense_total": 4718592, "linear_dense_nnz": 1830183}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757175, "linear_attention_total": 2359296, "linear_attention_nnz": 316672, "linear_dense_total": 4718592, "linear_dense_nnz": 1440503}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1151305, "linear_attention_total": 2359296, "linear_attention_nnz": 292096, "linear_dense_total": 4718592, "linear_dense_nnz": 859209}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 873504, "linear_attention_total": 2359296, "linear_attention_nnz": 227328, "linear_dense_total": 4718592, "linear_dense_nnz": 646176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 454046, "linear_attention_total": 2359296, "linear_attention_nnz": 128000, "linear_dense_total": 4718592, "linear_dense_nnz": 326046}}, "total_sparsity": 53.71709208691902, "linear_sparsity": 68.84563469592435}, "speed": {"eval_elapsed_time": 30.32163338200189}, "opt_eval_metrics": {"exact_match": 79.92431409649953, "f1": 87.57193515884181}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 50358753, "linear_total": 84934656, "linear_nnz": 26420688, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2668105, "linear_attention_total": 2359296, "linear_attention_nnz": 335872, "linear_dense_total": 4718592, "linear_dense_nnz": 2332233}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2839080, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 2387496}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2866908, "linear_attention_total": 2359296, "linear_attention_nnz": 408576, "linear_dense_total": 4718592, "linear_dense_nnz": 2458332}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3103682, "linear_attention_total": 2359296, "linear_attention_nnz": 682496, "linear_dense_total": 4718592, "linear_dense_nnz": 2421186}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2853238, "linear_attention_total": 2359296, "linear_attention_nnz": 504832, "linear_dense_total": 4718592, "linear_dense_nnz": 2348406}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2880784, "linear_attention_total": 2359296, "linear_attention_nnz": 558336, "linear_dense_total": 4718592, "linear_dense_nnz": 2322448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2614378, "linear_attention_total": 2359296, "linear_attention_nnz": 475904, "linear_dense_total": 4718592, "linear_dense_nnz": 2138474}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2372808, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 1830088}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1752978, "linear_attention_total": 2359296, "linear_attention_nnz": 312576, "linear_dense_total": 4718592, "linear_dense_nnz": 1440402}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1147129, "linear_attention_total": 2359296, "linear_attention_nnz": 288000, "linear_dense_total": 4718592, "linear_dense_nnz": 859129}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870927, "linear_attention_total": 2359296, "linear_attention_nnz": 224768, "linear_dense_total": 4718592, "linear_dense_nnz": 646159}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 450671, "linear_attention_total": 2359296, "linear_attention_nnz": 124672, "linear_dense_total": 4718592, "linear_dense_nnz": 325999}}, "total_sparsity": 53.75399063078199, "linear_sparsity": 68.89292399088542}, "speed": {"eval_elapsed_time": 30.31158491410315}, "opt_eval_metrics": {"exact_match": 80.02838221381268, "f1": 87.5280353923367}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 58360680, "linear_total": 84934656, "linear_nnz": 34416900, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3232823, "linear_attention_total": 2359296, "linear_attention_nnz": 405824, "linear_dense_total": 4718592, "linear_dense_nnz": 2826999}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3412647, "linear_attention_total": 2359296, "linear_attention_nnz": 543872, "linear_dense_total": 4718592, "linear_dense_nnz": 2868775}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3556851, "linear_attention_total": 2359296, "linear_attention_nnz": 613248, "linear_dense_total": 4718592, "linear_dense_nnz": 2943603}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3708198, "linear_attention_total": 2359296, "linear_attention_nnz": 791424, "linear_dense_total": 4718592, "linear_dense_nnz": 2916774}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3695959, "linear_attention_total": 2359296, "linear_attention_nnz": 819072, "linear_dense_total": 4718592, "linear_dense_nnz": 2876887}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3644598, "linear_attention_total": 2359296, "linear_attention_nnz": 788928, "linear_dense_total": 4718592, "linear_dense_nnz": 2855670}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3486486, "linear_attention_total": 2359296, "linear_attention_nnz": 761600, "linear_dense_total": 4718592, "linear_dense_nnz": 2724886}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114460, "linear_attention_total": 2359296, "linear_attention_nnz": 686464, "linear_dense_total": 4718592, "linear_dense_nnz": 2427996}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2616038, "linear_attention_total": 2359296, "linear_attention_nnz": 602496, "linear_dense_total": 4718592, "linear_dense_nnz": 2013542}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1775741, "linear_attention_total": 2359296, "linear_attention_nnz": 381632, "linear_dense_total": 4718592, "linear_dense_nnz": 1394109}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1407393, "linear_attention_total": 2359296, "linear_attention_nnz": 325760, "linear_dense_total": 4718592, "linear_dense_nnz": 1081633}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 765706, "linear_attention_total": 2359296, "linear_attention_nnz": 174016, "linear_dense_total": 4718592, "linear_dense_nnz": 591690}}, "total_sparsity": 46.405572153982156, "linear_sparsity": 59.47837829589844}, "speed": {"eval_elapsed_time": 34.96877746190876}, "opt_eval_metrics": {"exact_match": 81.00283822138127, "f1": 88.2671108560581}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 58344499, "linear_total": 84934656, "linear_nnz": 34400721, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3231632, "linear_attention_total": 2359296, "linear_attention_nnz": 404736, "linear_dense_total": 4718592, "linear_dense_nnz": 2826896}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3411716, "linear_attention_total": 2359296, "linear_attention_nnz": 543040, "linear_dense_total": 4718592, "linear_dense_nnz": 2868676}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3557965, "linear_attention_total": 2359296, "linear_attention_nnz": 614464, "linear_dense_total": 4718592, "linear_dense_nnz": 2943501}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3706774, "linear_attention_total": 2359296, "linear_attention_nnz": 790144, "linear_dense_total": 4718592, "linear_dense_nnz": 2916630}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3693580, "linear_attention_total": 2359296, "linear_attention_nnz": 816832, "linear_dense_total": 4718592, "linear_dense_nnz": 2876748}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3641505, "linear_attention_total": 2359296, "linear_attention_nnz": 785920, "linear_dense_total": 4718592, "linear_dense_nnz": 2855585}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3484162, "linear_attention_total": 2359296, "linear_attention_nnz": 759424, "linear_dense_total": 4718592, "linear_dense_nnz": 2724738}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114894, "linear_attention_total": 2359296, "linear_attention_nnz": 687040, "linear_dense_total": 4718592, "linear_dense_nnz": 2427854}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2617066, "linear_attention_total": 2359296, "linear_attention_nnz": 603648, "linear_dense_total": 4718592, "linear_dense_nnz": 2013418}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1773359, "linear_attention_total": 2359296, "linear_attention_nnz": 379328, "linear_dense_total": 4718592, "linear_dense_nnz": 1394031}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404175, "linear_attention_total": 2359296, "linear_attention_nnz": 322624, "linear_dense_total": 4718592, "linear_dense_nnz": 1081551}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 763893, "linear_attention_total": 2359296, "linear_attention_nnz": 172288, "linear_dense_total": 4718592, "linear_dense_nnz": 591605}}, "total_sparsity": 46.42043166961797, "linear_sparsity": 59.49742705733687}, "speed": {"eval_elapsed_time": 34.96654323185794}, "opt_eval_metrics": {"exact_match": 81.01229895931883, "f1": 88.16022239737082}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 59478503, "linear_total": 84934656, "linear_nnz": 35536574, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3362923, "linear_attention_total": 2359296, "linear_attention_nnz": 466432, "linear_dense_total": 4718592, "linear_dense_nnz": 2896491}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3511822, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 2933262}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3642442, "linear_attention_total": 2359296, "linear_attention_nnz": 636672, "linear_dense_total": 4718592, "linear_dense_nnz": 3005770}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3843335, "linear_attention_total": 2359296, "linear_attention_nnz": 857344, "linear_dense_total": 4718592, "linear_dense_nnz": 2985991}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3768311, "linear_attention_total": 2359296, "linear_attention_nnz": 829184, "linear_dense_total": 4718592, "linear_dense_nnz": 2939127}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3670401, "linear_attention_total": 2359296, "linear_attention_nnz": 754432, "linear_dense_total": 4718592, "linear_dense_nnz": 2915969}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3555086, "linear_attention_total": 2359296, "linear_attention_nnz": 767488, "linear_dense_total": 4718592, "linear_dense_nnz": 2787598}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3250234, "linear_attention_total": 2359296, "linear_attention_nnz": 752640, "linear_dense_total": 4718592, "linear_dense_nnz": 2497594}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2669249, "linear_attention_total": 2359296, "linear_attention_nnz": 553472, "linear_dense_total": 4718592, "linear_dense_nnz": 2115777}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1903656, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 1490216}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522534, "linear_attention_total": 2359296, "linear_attention_nnz": 353792, "linear_dense_total": 4718592, "linear_dense_nnz": 1168742}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 836581, "linear_attention_total": 2359296, "linear_attention_nnz": 204032, "linear_dense_total": 4718592, "linear_dense_nnz": 632549}}, "total_sparsity": 45.379040521415185, "linear_sparsity": 58.160101337197375}, "speed": {"eval_elapsed_time": 31.76222953083925}, "opt_eval_metrics": {"exact_match": 80.93661305581836, "f1": 88.29241912882233}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 59470230, "linear_total": 84934656, "linear_nnz": 35528301, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3365714, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 2896466}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3508110, "linear_attention_total": 2359296, "linear_attention_nnz": 574976, "linear_dense_total": 4718592, "linear_dense_nnz": 2933134}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3640290, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 3005666}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3837370, "linear_attention_total": 2359296, "linear_attention_nnz": 851456, "linear_dense_total": 4718592, "linear_dense_nnz": 2985914}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3769702, "linear_attention_total": 2359296, "linear_attention_nnz": 830720, "linear_dense_total": 4718592, "linear_dense_nnz": 2938982}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3672353, "linear_attention_total": 2359296, "linear_attention_nnz": 756480, "linear_dense_total": 4718592, "linear_dense_nnz": 2915873}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3555719, "linear_attention_total": 2359296, "linear_attention_nnz": 768256, "linear_dense_total": 4718592, "linear_dense_nnz": 2787463}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3250893, "linear_attention_total": 2359296, "linear_attention_nnz": 753408, "linear_dense_total": 4718592, "linear_dense_nnz": 2497485}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666597, "linear_attention_total": 2359296, "linear_attention_nnz": 550912, "linear_dense_total": 4718592, "linear_dense_nnz": 2115685}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1903316, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 1490132}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1524282, "linear_attention_total": 2359296, "linear_attention_nnz": 355584, "linear_dense_total": 4718592, "linear_dense_nnz": 1168698}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 833955, "linear_attention_total": 2359296, "linear_attention_nnz": 201472, "linear_dense_total": 4718592, "linear_dense_nnz": 632483}}, "total_sparsity": 45.38663787466004, "linear_sparsity": 58.16984176635742}, "speed": {"eval_elapsed_time": 31.761109228944406}, "opt_eval_metrics": {"exact_match": 80.77578051087986, "f1": 88.22778160568927}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41069735, "linear_total": 84934656, "linear_nnz": 17134148, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825423, "linear_attention_total": 2359296, "linear_attention_nnz": 185152, "linear_dense_total": 4718592, "linear_dense_nnz": 1640271}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2011232, "linear_attention_total": 2359296, "linear_attention_nnz": 309376, "linear_dense_total": 4718592, "linear_dense_nnz": 1701856}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2013521, "linear_attention_total": 2359296, "linear_attention_nnz": 266368, "linear_dense_total": 4718592, "linear_dense_nnz": 1747153}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2151481, "linear_attention_total": 2359296, "linear_attention_nnz": 452288, "linear_dense_total": 4718592, "linear_dense_nnz": 1699193}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1937929, "linear_attention_total": 2359296, "linear_attention_nnz": 315584, "linear_dense_total": 4718592, "linear_dense_nnz": 1622345}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1906344, "linear_attention_total": 2359296, "linear_attention_nnz": 324160, "linear_dense_total": 4718592, "linear_dense_nnz": 1582184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660767, "linear_attention_total": 2359296, "linear_attention_nnz": 264448, "linear_dense_total": 4718592, "linear_dense_nnz": 1396319}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1418922, "linear_attention_total": 2359296, "linear_attention_nnz": 312704, "linear_dense_total": 4718592, "linear_dense_nnz": 1106218}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 973188, "linear_attention_total": 2359296, "linear_attention_nnz": 176128, "linear_dense_total": 4718592, "linear_dense_nnz": 797060}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574608, "linear_attention_total": 2359296, "linear_attention_nnz": 178368, "linear_dense_total": 4718592, "linear_dense_nnz": 396240}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 423537, "linear_attention_total": 2359296, "linear_attention_nnz": 140224, "linear_dense_total": 4718592, "linear_dense_nnz": 283313}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 237196, "linear_attention_total": 2359296, "linear_attention_nnz": 82304, "linear_dense_total": 4718592, "linear_dense_nnz": 154892}}, "total_sparsity": 62.28438480989986, "linear_sparsity": 79.82667051715615}, "speed": {"eval_elapsed_time": 31.13649192615412}, "opt_eval_metrics": {"exact_match": 78.59981078524125, "f1": 86.70965342219107}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40928357, "linear_total": 84934656, "linear_nnz": 16992855, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1811376, "linear_attention_total": 2359296, "linear_attention_nnz": 181120, "linear_dense_total": 4718592, "linear_dense_nnz": 1630256}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1999638, "linear_attention_total": 2359296, "linear_attention_nnz": 307392, "linear_dense_total": 4718592, "linear_dense_nnz": 1692246}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2004326, "linear_attention_total": 2359296, "linear_attention_nnz": 266880, "linear_dense_total": 4718592, "linear_dense_nnz": 1737446}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2132105, "linear_attention_total": 2359296, "linear_attention_nnz": 442304, "linear_dense_total": 4718592, "linear_dense_nnz": 1689801}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1922729, "linear_attention_total": 2359296, "linear_attention_nnz": 309632, "linear_dense_total": 4718592, "linear_dense_nnz": 1613097}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886520, "linear_attention_total": 2359296, "linear_attention_nnz": 313664, "linear_dense_total": 4718592, "linear_dense_nnz": 1572856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1646649, "linear_attention_total": 2359296, "linear_attention_nnz": 259072, "linear_dense_total": 4718592, "linear_dense_nnz": 1387577}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404939, "linear_attention_total": 2359296, "linear_attention_nnz": 306112, "linear_dense_total": 4718592, "linear_dense_nnz": 1098827}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 964630, "linear_attention_total": 2359296, "linear_attention_nnz": 173184, "linear_dense_total": 4718592, "linear_dense_nnz": 791446}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566118, "linear_attention_total": 2359296, "linear_attention_nnz": 172928, "linear_dense_total": 4718592, "linear_dense_nnz": 393190}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417586, "linear_attention_total": 2359296, "linear_attention_nnz": 136448, "linear_dense_total": 4718592, "linear_dense_nnz": 281138}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 236239, "linear_attention_total": 2359296, "linear_attention_nnz": 82304, "linear_dense_total": 4718592, "linear_dense_nnz": 153935}}, "total_sparsity": 62.414216625088, "linear_sparsity": 79.99302546183267}, "speed": {"eval_elapsed_time": 31.135055932216346}, "opt_eval_metrics": {"exact_match": 78.78902554399244, "f1": 86.80367154149816}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 40912185, "linear_total": 84934656, "linear_nnz": 16976675, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1810859, "linear_attention_total": 2359296, "linear_attention_nnz": 180736, "linear_dense_total": 4718592, "linear_dense_nnz": 1630123}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998023, "linear_attention_total": 2359296, "linear_attention_nnz": 305920, "linear_dense_total": 4718592, "linear_dense_nnz": 1692103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2001199, "linear_attention_total": 2359296, "linear_attention_nnz": 263936, "linear_dense_total": 4718592, "linear_dense_nnz": 1737263}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2132118, "linear_attention_total": 2359296, "linear_attention_nnz": 442496, "linear_dense_total": 4718592, "linear_dense_nnz": 1689622}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1919231, "linear_attention_total": 2359296, "linear_attention_nnz": 306304, "linear_dense_total": 4718592, "linear_dense_nnz": 1612927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1884897, "linear_attention_total": 2359296, "linear_attention_nnz": 312128, "linear_dense_total": 4718592, "linear_dense_nnz": 1572769}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1645758, "linear_attention_total": 2359296, "linear_attention_nnz": 258304, "linear_dense_total": 4718592, "linear_dense_nnz": 1387454}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404565, "linear_attention_total": 2359296, "linear_attention_nnz": 305856, "linear_dense_total": 4718592, "linear_dense_nnz": 1098709}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 963790, "linear_attention_total": 2359296, "linear_attention_nnz": 172480, "linear_dense_total": 4718592, "linear_dense_nnz": 791310}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 564071, "linear_attention_total": 2359296, "linear_attention_nnz": 170944, "linear_dense_total": 4718592, "linear_dense_nnz": 393127}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 416716, "linear_attention_total": 2359296, "linear_attention_nnz": 135616, "linear_dense_total": 4718592, "linear_dense_nnz": 281100}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 235448, "linear_attention_total": 2359296, "linear_attention_nnz": 81536, "linear_dense_total": 4718592, "linear_dense_nnz": 153912}}, "total_sparsity": 62.42906787574385, "linear_sparsity": 80.01207540064682}, "speed": {"eval_elapsed_time": 31.128417774103582}, "opt_eval_metrics": {"exact_match": 78.6092715231788, "f1": 86.70267601348202}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39496838, "linear_total": 84934656, "linear_nnz": 15559744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1647204, "linear_attention_total": 2359296, "linear_attention_nnz": 145232, "linear_dense_total": 4718592, "linear_dense_nnz": 1501972}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842586, "linear_attention_total": 2359296, "linear_attention_nnz": 280192, "linear_dense_total": 4718592, "linear_dense_nnz": 1562394}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841850, "linear_attention_total": 2359296, "linear_attention_nnz": 234064, "linear_dense_total": 4718592, "linear_dense_nnz": 1607786}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1960732, "linear_attention_total": 2359296, "linear_attention_nnz": 386752, "linear_dense_total": 4718592, "linear_dense_nnz": 1573980}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1776766, "linear_attention_total": 2359296, "linear_attention_nnz": 281632, "linear_dense_total": 4718592, "linear_dense_nnz": 1495134}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1744230, "linear_attention_total": 2359296, "linear_attention_nnz": 288320, "linear_dense_total": 4718592, "linear_dense_nnz": 1455910}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518906, "linear_attention_total": 2359296, "linear_attention_nnz": 240864, "linear_dense_total": 4718592, "linear_dense_nnz": 1278042}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1276331, "linear_attention_total": 2359296, "linear_attention_nnz": 275424, "linear_dense_total": 4718592, "linear_dense_nnz": 1000907}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 878990, "linear_attention_total": 2359296, "linear_attention_nnz": 170816, "linear_dense_total": 4718592, "linear_dense_nnz": 708174}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496916, "linear_attention_total": 2359296, "linear_attention_nnz": 165920, "linear_dense_total": 4718592, "linear_dense_nnz": 330996}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360001, "linear_attention_total": 2359296, "linear_attention_nnz": 126288, "linear_dense_total": 4718592, "linear_dense_nnz": 233713}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 215232, "linear_attention_total": 2359296, "linear_attention_nnz": 73824, "linear_dense_total": 4718592, "linear_dense_nnz": 141408}}, "total_sparsity": 63.728825052469304, "linear_sparsity": 81.68033552758487}, "speed": {"eval_elapsed_time": 33.28608238999732}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 87.22039562207584}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39357122, "linear_total": 84934656, "linear_nnz": 15420094, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1634237, "linear_attention_total": 2359296, "linear_attention_nnz": 142224, "linear_dense_total": 4718592, "linear_dense_nnz": 1492013}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828361, "linear_attention_total": 2359296, "linear_attention_nnz": 275696, "linear_dense_total": 4718592, "linear_dense_nnz": 1552665}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825967, "linear_attention_total": 2359296, "linear_attention_nnz": 227984, "linear_dense_total": 4718592, "linear_dense_nnz": 1597983}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1943566, "linear_attention_total": 2359296, "linear_attention_nnz": 379616, "linear_dense_total": 4718592, "linear_dense_nnz": 1563950}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1761455, "linear_attention_total": 2359296, "linear_attention_nnz": 275824, "linear_dense_total": 4718592, "linear_dense_nnz": 1485631}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1729290, "linear_attention_total": 2359296, "linear_attention_nnz": 282736, "linear_dense_total": 4718592, "linear_dense_nnz": 1446554}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1504955, "linear_attention_total": 2359296, "linear_attention_nnz": 235856, "linear_dense_total": 4718592, "linear_dense_nnz": 1269099}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1263225, "linear_attention_total": 2359296, "linear_attention_nnz": 269520, "linear_dense_total": 4718592, "linear_dense_nnz": 993705}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870496, "linear_attention_total": 2359296, "linear_attention_nnz": 167616, "linear_dense_total": 4718592, "linear_dense_nnz": 702880}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489695, "linear_attention_total": 2359296, "linear_attention_nnz": 161552, "linear_dense_total": 4718592, "linear_dense_nnz": 328143}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 355803, "linear_attention_total": 2359296, "linear_attention_nnz": 124096, "linear_dense_total": 4718592, "linear_dense_nnz": 231707}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 213044, "linear_attention_total": 2359296, "linear_attention_nnz": 72608, "linear_dense_total": 4718592, "linear_dense_nnz": 140436}}, "total_sparsity": 63.85713060135829, "linear_sparsity": 81.84475604398752}, "speed": {"eval_elapsed_time": 33.33789288206026}, "opt_eval_metrics": {"exact_match": 79.12961210974456, "f1": 87.04337592394437}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 39354055, "linear_total": 84934656, "linear_nnz": 15417031, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1634041, "linear_attention_total": 2359296, "linear_attention_nnz": 142224, "linear_dense_total": 4718592, "linear_dense_nnz": 1491817}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828346, "linear_attention_total": 2359296, "linear_attention_nnz": 275888, "linear_dense_total": 4718592, "linear_dense_nnz": 1552458}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825560, "linear_attention_total": 2359296, "linear_attention_nnz": 227744, "linear_dense_total": 4718592, "linear_dense_nnz": 1597816}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1942802, "linear_attention_total": 2359296, "linear_attention_nnz": 379008, "linear_dense_total": 4718592, "linear_dense_nnz": 1563794}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1761660, "linear_attention_total": 2359296, "linear_attention_nnz": 276192, "linear_dense_total": 4718592, "linear_dense_nnz": 1485468}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1728493, "linear_attention_total": 2359296, "linear_attention_nnz": 282096, "linear_dense_total": 4718592, "linear_dense_nnz": 1446397}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1504843, "linear_attention_total": 2359296, "linear_attention_nnz": 235856, "linear_dense_total": 4718592, "linear_dense_nnz": 1268987}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1262994, "linear_attention_total": 2359296, "linear_attention_nnz": 269456, "linear_dense_total": 4718592, "linear_dense_nnz": 993538}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870263, "linear_attention_total": 2359296, "linear_attention_nnz": 167520, "linear_dense_total": 4718592, "linear_dense_nnz": 702743}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489503, "linear_attention_total": 2359296, "linear_attention_nnz": 161424, "linear_dense_total": 4718592, "linear_dense_nnz": 328079}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 355610, "linear_attention_total": 2359296, "linear_attention_nnz": 123920, "linear_dense_total": 4718592, "linear_dense_nnz": 231690}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 212916, "linear_attention_total": 2359296, "linear_attention_nnz": 72512, "linear_dense_total": 4718592, "linear_dense_nnz": 140404}}, "total_sparsity": 63.859947122862216, "linear_sparsity": 81.84836234575437}, "speed": {"eval_elapsed_time": 33.41487950505689}, "opt_eval_metrics": {"exact_match": 79.09176915799432, "f1": 86.93076968810146}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l10--2021-01-19--17-00-07/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 56967217, "linear_total": 84934656, "linear_nnz": 33019881, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3067235, "linear_attention_total": 2359296, "linear_attention_nnz": 356016, "linear_dense_total": 4718592, "linear_dense_nnz": 2711219}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3260347, "linear_attention_total": 2359296, "linear_attention_nnz": 506400, "linear_dense_total": 4718592, "linear_dense_nnz": 2753947}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3486676, "linear_attention_total": 2359296, "linear_attention_nnz": 658880, "linear_dense_total": 4718592, "linear_dense_nnz": 2827796}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3592390, "linear_attention_total": 2359296, "linear_attention_nnz": 782176, "linear_dense_total": 4718592, "linear_dense_nnz": 2810214}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3644732, "linear_attention_total": 2359296, "linear_attention_nnz": 874272, "linear_dense_total": 4718592, "linear_dense_nnz": 2770460}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3523230, "linear_attention_total": 2359296, "linear_attention_nnz": 772928, "linear_dense_total": 4718592, "linear_dense_nnz": 2750302}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3378315, "linear_attention_total": 2359296, "linear_attention_nnz": 767984, "linear_dense_total": 4718592, "linear_dense_nnz": 2610331}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2983346, "linear_attention_total": 2359296, "linear_attention_nnz": 687968, "linear_dense_total": 4718592, "linear_dense_nnz": 2295378}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465095, "linear_attention_total": 2359296, "linear_attention_nnz": 596368, "linear_dense_total": 4718592, "linear_dense_nnz": 1868727}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1650223, "linear_attention_total": 2359296, "linear_attention_nnz": 404448, "linear_dense_total": 4718592, "linear_dense_nnz": 1245775}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1262562, "linear_attention_total": 2359296, "linear_attention_nnz": 305952, "linear_dense_total": 4718592, "linear_dense_nnz": 956610}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705730, "linear_attention_total": 2359296, "linear_attention_nnz": 172864, "linear_dense_total": 4718592, "linear_dense_nnz": 532866}}, "total_sparsity": 47.6852325727709, "linear_sparsity": 61.12319451791268}, "speed": {"eval_elapsed_time": 36.661399364005774}, "opt_eval_metrics": {"exact_match": 81.3434247871334, "f1": 88.502960365548}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l40--2021-01-19--16-58-18/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 42014844, "linear_total": 84934656, "linear_nnz": 18080164, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1930333, "linear_attention_total": 2359296, "linear_attention_nnz": 211712, "linear_dense_total": 4718592, "linear_dense_nnz": 1718621}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2116878, "linear_attention_total": 2359296, "linear_attention_nnz": 345600, "linear_dense_total": 4718592, "linear_dense_nnz": 1771278}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2094823, "linear_attention_total": 2359296, "linear_attention_nnz": 278016, "linear_dense_total": 4718592, "linear_dense_nnz": 1816807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2266081, "linear_attention_total": 2359296, "linear_attention_nnz": 493312, "linear_dense_total": 4718592, "linear_dense_nnz": 1772769}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1986893, "linear_attention_total": 2359296, "linear_attention_nnz": 304128, "linear_dense_total": 4718592, "linear_dense_nnz": 1682765}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1992507, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 1635131}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1736239, "linear_attention_total": 2359296, "linear_attention_nnz": 278528, "linear_dense_total": 4718592, "linear_dense_nnz": 1457711}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529879, "linear_attention_total": 2359296, "linear_attention_nnz": 355072, "linear_dense_total": 4718592, "linear_dense_nnz": 1174807}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1051417, "linear_attention_total": 2359296, "linear_attention_nnz": 183552, "linear_dense_total": 4718592, "linear_dense_nnz": 867865}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 636321, "linear_attention_total": 2359296, "linear_attention_nnz": 196864, "linear_dense_total": 4718592, "linear_dense_nnz": 439457}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 483651, "linear_attention_total": 2359296, "linear_attention_nnz": 157696, "linear_dense_total": 4718592, "linear_dense_nnz": 325955}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 255142, "linear_attention_total": 2359296, "linear_attention_nnz": 90368, "linear_dense_total": 4718592, "linear_dense_nnz": 164774}}, "total_sparsity": 61.41646181607727, "linear_sparsity": 78.7128542676384}, "speed": {"eval_elapsed_time": 30.22641655593179}, "opt_eval_metrics": {"exact_match": 78.86471144749291, "f1": 86.87223379259328}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_0/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39362145, "linear_total": 84934656, "linear_nnz": 15449344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1074688, "linear_attention_total": 2359296, "linear_attention_nnz": 796672, "linear_dense_total": 4718592, "linear_dense_nnz": 278016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236224, "linear_attention_total": 2359296, "linear_attention_nnz": 769280, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1635328, "linear_attention_total": 2359296, "linear_attention_nnz": 1056256, "linear_dense_total": 4718592, "linear_dense_nnz": 579072}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 1259264, "linear_dense_total": 4718592, "linear_dense_nnz": 640512}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2041856, "linear_attention_total": 2359296, "linear_attention_nnz": 1344512, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860096, "linear_attention_total": 2359296, "linear_attention_nnz": 1244160, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571072, "linear_attention_total": 2359296, "linear_attention_nnz": 1088768, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1299200, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 382464}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1150464, "linear_attention_total": 2359296, "linear_attention_nnz": 955392, "linear_dense_total": 4718592, "linear_dense_nnz": 195072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668928, "linear_attention_total": 2359296, "linear_attention_nnz": 590592, "linear_dense_total": 4718592, "linear_dense_nnz": 78336}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 548352, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 72192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 463360, "linear_attention_total": 2359296, "linear_attention_nnz": 308224, "linear_dense_total": 4718592, "linear_dense_nnz": 155136}}, "total_sparsity": 63.85251782420986, "linear_sparsity": 81.81031780478395}, "speed": {"eval_elapsed_time": 18.96496795094572}, "opt_eval_metrics": {"exact_match": 79.33774834437087, "f1": 87.07382313022637}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l20--2021-01-19--17-00-34/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 47377613, "linear_total": 84934656, "linear_nnz": 23436196, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2342065, "linear_attention_total": 2359296, "linear_attention_nnz": 233808, "linear_dense_total": 4718592, "linear_dense_nnz": 2108257}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2536721, "linear_attention_total": 2359296, "linear_attention_nnz": 370912, "linear_dense_total": 4718592, "linear_dense_nnz": 2165809}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2607352, "linear_attention_total": 2359296, "linear_attention_nnz": 368864, "linear_dense_total": 4718592, "linear_dense_nnz": 2238488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2740822, "linear_attention_total": 2359296, "linear_attention_nnz": 528528, "linear_dense_total": 4718592, "linear_dense_nnz": 2212294}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2662766, "linear_attention_total": 2359296, "linear_attention_nnz": 515168, "linear_dense_total": 4718592, "linear_dense_nnz": 2147598}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2582248, "linear_attention_total": 2359296, "linear_attention_nnz": 456576, "linear_dense_total": 4718592, "linear_dense_nnz": 2125672}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2384302, "linear_attention_total": 2359296, "linear_attention_nnz": 426512, "linear_dense_total": 4718592, "linear_dense_nnz": 1957790}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045939, "linear_attention_total": 2359296, "linear_attention_nnz": 424416, "linear_dense_total": 4718592, "linear_dense_nnz": 1621523}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1531552, "linear_attention_total": 2359296, "linear_attention_nnz": 311248, "linear_dense_total": 4718592, "linear_dense_nnz": 1220304}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 936640, "linear_attention_total": 2359296, "linear_attention_nnz": 249120, "linear_dense_total": 4718592, "linear_dense_nnz": 687520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 686193, "linear_attention_total": 2359296, "linear_attention_nnz": 189856, "linear_dense_total": 4718592, "linear_dense_nnz": 496337}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379596, "linear_attention_total": 2359296, "linear_attention_nnz": 106192, "linear_dense_total": 4718592, "linear_dense_nnz": 273404}}, "total_sparsity": 56.49166422589565, "linear_sparsity": 72.40679234634212}, "speed": {"eval_elapsed_time": 34.8490983331576}, "opt_eval_metrics": {"exact_match": 80.54872280037843, "f1": 88.09731480353894}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42067458, "linear_total": 84934656, "linear_nnz": 18108928, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437184, "linear_attention_total": 2359296, "linear_attention_nnz": 472576, "linear_dense_total": 4718592, "linear_dense_nnz": 964608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1754624, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015488, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2276608, "linear_attention_total": 2359296, "linear_attention_nnz": 951040, "linear_dense_total": 4718592, "linear_dense_nnz": 1325568}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2280448, "linear_attention_total": 2359296, "linear_attention_nnz": 861184, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2123008, "linear_attention_total": 2359296, "linear_attention_nnz": 779008, "linear_dense_total": 4718592, "linear_dense_nnz": 1344000}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841152, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1041408}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553664, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042432, "linear_attention_total": 2359296, "linear_attention_nnz": 610816, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 584960, "linear_attention_total": 2359296, "linear_attention_nnz": 405248, "linear_dense_total": 4718592, "linear_dense_nnz": 179712}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 540928, "linear_attention_total": 2359296, "linear_attention_nnz": 395008, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 658432, "linear_attention_total": 2359296, "linear_attention_nnz": 217600, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}}, "total_sparsity": 61.3681447432349, "linear_sparsity": 78.6789882330247}, "speed": {"eval_elapsed_time": 18.375705623999238}, "opt_eval_metrics": {"exact_match": 79.38505203405866, "f1": 87.07400986053686}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41735426, "linear_total": 84934656, "linear_nnz": 17776896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1405440, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 929280}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1732352, "linear_attention_total": 2359296, "linear_attention_nnz": 589568, "linear_dense_total": 4718592, "linear_dense_nnz": 1142784}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1979136, "linear_attention_total": 2359296, "linear_attention_nnz": 628992, "linear_dense_total": 4718592, "linear_dense_nnz": 1350144}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2218752, "linear_attention_total": 2359296, "linear_attention_nnz": 913152, "linear_dense_total": 4718592, "linear_dense_nnz": 1305600}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2257664, "linear_attention_total": 2359296, "linear_attention_nnz": 850688, "linear_dense_total": 4718592, "linear_dense_nnz": 1406976}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096384, "linear_attention_total": 2359296, "linear_attention_nnz": 764672, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1786112, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 1022976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1538816, "linear_attention_total": 2359296, "linear_attention_nnz": 781568, "linear_dense_total": 4718592, "linear_dense_nnz": 757248}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1027840, "linear_attention_total": 2359296, "linear_attention_nnz": 596224, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 176640}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523008, "linear_attention_total": 2359296, "linear_attention_nnz": 378624, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640000, "linear_attention_total": 2359296, "linear_attention_nnz": 208384, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.67306005721974, "linear_sparsity": 79.0699146412037}, "speed": {"eval_elapsed_time": 17.870226074010134}, "opt_eval_metrics": {"exact_match": 78.72280037842951, "f1": 86.62043892712619}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 43462146, "linear_total": 84934656, "linear_nnz": 19503616, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660672, "linear_attention_total": 2359296, "linear_attention_nnz": 579328, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 632576, "linear_dense_total": 4718592, "linear_dense_nnz": 1267200}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2031104, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1446912}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2544128, "linear_attention_total": 2359296, "linear_attention_nnz": 1049600, "linear_dense_total": 4718592, "linear_dense_nnz": 1494528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2395904, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 1479168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2184960, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 1394688}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1912320, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1113600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 969216, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 471808, "linear_dense_total": 4718592, "linear_dense_nnz": 497664}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 717312, "linear_attention_total": 2359296, "linear_attention_nnz": 505344, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631040, "linear_attention_total": 2359296, "linear_attention_nnz": 448256, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 288256, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}}, "total_sparsity": 60.08735936884057, "linear_sparsity": 77.03691647376543}, "speed": {"eval_elapsed_time": 16.235010980977677}, "opt_eval_metrics": {"exact_match": 78.93093661305582, "f1": 86.85787750084084}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42678018, "linear_total": 84934656, "linear_nnz": 18719488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1560320, "linear_attention_total": 2359296, "linear_attention_nnz": 543488, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1816320, "linear_attention_total": 2359296, "linear_attention_nnz": 593664, "linear_dense_total": 4718592, "linear_dense_nnz": 1222656}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2024704, "linear_attention_total": 2359296, "linear_attention_nnz": 603904, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2329856, "linear_attention_total": 2359296, "linear_attention_nnz": 870656, "linear_dense_total": 4718592, "linear_dense_nnz": 1459200}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2332928, "linear_attention_total": 2359296, "linear_attention_nnz": 887552, "linear_dense_total": 4718592, "linear_dense_nnz": 1445376}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 720640, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742080, "linear_attention_total": 2359296, "linear_attention_nnz": 926464, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 944384, "linear_attention_total": 2359296, "linear_attention_nnz": 455936, "linear_dense_total": 4718592, "linear_dense_nnz": 488448}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705280, "linear_attention_total": 2359296, "linear_attention_nnz": 505600, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 587264, "linear_attention_total": 2359296, "linear_attention_nnz": 409088, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 697856, "linear_attention_total": 2359296, "linear_attention_nnz": 250880, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}}, "total_sparsity": 60.80744850279245, "linear_sparsity": 77.96012972608024}, "speed": {"eval_elapsed_time": 16.02295208198484}, "opt_eval_metrics": {"exact_match": 78.78902554399244, "f1": 86.63899702391797}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 46192898, "linear_total": 84934656, "linear_nnz": 22234368, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 551680, "linear_dense_total": 4718592, "linear_dense_nnz": 1539072}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2278656, "linear_attention_total": 2359296, "linear_attention_nnz": 596736, "linear_dense_total": 4718592, "linear_dense_nnz": 1681920}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2418688, "linear_attention_total": 2359296, "linear_attention_nnz": 567808, "linear_dense_total": 4718592, "linear_dense_nnz": 1850880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2844416, "linear_attention_total": 2359296, "linear_attention_nnz": 1002752, "linear_dense_total": 4718592, "linear_dense_nnz": 1841664}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2691072, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 1812480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2475264, "linear_attention_total": 2359296, "linear_attention_nnz": 721152, "linear_dense_total": 4718592, "linear_dense_nnz": 1754112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2229248, "linear_attention_total": 2359296, "linear_attention_nnz": 805376, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1966336, "linear_attention_total": 2359296, "linear_attention_nnz": 892672, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701440, "linear_attention_total": 2359296, "linear_attention_nnz": 454144, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598272, "linear_attention_total": 2359296, "linear_attention_nnz": 361728, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858880, "linear_attention_total": 2359296, "linear_attention_nnz": 238336, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}}, "total_sparsity": 57.57962486284496, "linear_sparsity": 73.82179542824075}, "speed": {"eval_elapsed_time": 17.927992683951743}, "opt_eval_metrics": {"exact_match": 79.38505203405866, "f1": 86.84616693145111}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39741442, "linear_total": 84934656, "linear_nnz": 15782912, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1171968, "linear_attention_total": 2359296, "linear_attention_nnz": 511488, "linear_dense_total": 4718592, "linear_dense_nnz": 660480}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1531136, "linear_attention_total": 2359296, "linear_attention_nnz": 591104, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1722112, "linear_attention_total": 2359296, "linear_attention_nnz": 656128, "linear_dense_total": 4718592, "linear_dense_nnz": 1065984}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2136320, "linear_attention_total": 2359296, "linear_attention_nnz": 985856, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2026752, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1168896}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1822976, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 1138176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488384, "linear_attention_total": 2359296, "linear_attention_nnz": 668160, "linear_dense_total": 4718592, "linear_dense_nnz": 820224}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1423104, "linear_attention_total": 2359296, "linear_attention_nnz": 793344, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 861184, "linear_attention_total": 2359296, "linear_attention_nnz": 494080, "linear_dense_total": 4718592, "linear_dense_nnz": 367104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 570880, "linear_attention_total": 2359296, "linear_attention_nnz": 417280, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 499968, "linear_attention_total": 2359296, "linear_attention_nnz": 370944, "linear_dense_total": 4718592, "linear_dense_nnz": 129024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 528128, "linear_attention_total": 2359296, "linear_attention_nnz": 224000, "linear_dense_total": 4718592, "linear_dense_nnz": 304128}}, "total_sparsity": 63.504197590471826, "linear_sparsity": 81.41758294753086}, "speed": {"eval_elapsed_time": 16.204893412068486}, "opt_eval_metrics": {"exact_match": 78.93093661305582, "f1": 86.77654280449566}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-130000": {"stats": {"total": 108893186, "nnz": 38778370, "linear_total": 84934656, "linear_nnz": 14819840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1050624, "linear_attention_total": 2359296, "linear_attention_nnz": 488448, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 512512, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1628160, "linear_attention_total": 2359296, "linear_attention_nnz": 628224, "linear_dense_total": 4718592, "linear_dense_nnz": 999936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998592, "linear_attention_total": 2359296, "linear_attention_nnz": 937216, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1939968, "linear_attention_total": 2359296, "linear_attention_nnz": 821760, "linear_dense_total": 4718592, "linear_dense_nnz": 1118208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1709824, "linear_attention_total": 2359296, "linear_attention_nnz": 648448, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404928, "linear_attention_total": 2359296, "linear_attention_nnz": 641536, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817920, "linear_attention_total": 2359296, "linear_attention_nnz": 467712, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 544512, "linear_attention_total": 2359296, "linear_attention_nnz": 403200, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484096, "linear_attention_total": 2359296, "linear_attention_nnz": 367360, "linear_dense_total": 4718592, "linear_dense_nnz": 116736}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496896, "linear_attention_total": 2359296, "linear_attention_nnz": 225024, "linear_dense_total": 4718592, "linear_dense_nnz": 271872}}, "total_sparsity": 64.38861656596218, "linear_sparsity": 82.5514805169753}, "speed": {"eval_elapsed_time": 16.045786170987412}, "opt_eval_metrics": {"exact_match": 78.88363292336803, "f1": 86.63235572290178}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-165000": {"stats": {"total": 108893186, "nnz": 38293506, "linear_total": 84934656, "linear_nnz": 14334976, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1010688, "linear_attention_total": 2359296, "linear_attention_nnz": 468480, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1371392, "linear_attention_total": 2359296, "linear_attention_nnz": 518912, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1590272, "linear_attention_total": 2359296, "linear_attention_nnz": 608768, "linear_dense_total": 4718592, "linear_dense_nnz": 981504}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895936, "linear_attention_total": 2359296, "linear_attention_nnz": 869888, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1869568, "linear_attention_total": 2359296, "linear_attention_nnz": 775936, "linear_dense_total": 4718592, "linear_dense_nnz": 1093632}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663232, "linear_attention_total": 2359296, "linear_attention_nnz": 618752, "linear_dense_total": 4718592, "linear_dense_nnz": 1044480}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 629248, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1295872, "linear_attention_total": 2359296, "linear_attention_nnz": 707584, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 808704, "linear_attention_total": 2359296, "linear_attention_nnz": 463104, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 515840, "linear_attention_total": 2359296, "linear_attention_nnz": 376064, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455936, "linear_attention_total": 2359296, "linear_attention_nnz": 345344, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 212992, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 64.83388225963009, "linear_sparsity": 83.1223476080247}, "speed": {"eval_elapsed_time": 15.8394883510191}, "opt_eval_metrics": {"exact_match": 78.9593188268685, "f1": 86.71766917125102}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38916354, "linear_total": 84934656, "linear_nnz": 14957824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1209344, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 749568}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1494272, "linear_attention_total": 2359296, "linear_attention_nnz": 488192, "linear_dense_total": 4718592, "linear_dense_nnz": 1006080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1636096, "linear_attention_total": 2359296, "linear_attention_nnz": 550144, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969664, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1746944, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 1198080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1782272, "linear_attention_total": 2359296, "linear_attention_nnz": 653312, "linear_dense_total": 4718592, "linear_dense_nnz": 1128960}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1461760, "linear_attention_total": 2359296, "linear_attention_nnz": 593920, "linear_dense_total": 4718592, "linear_dense_nnz": 867840}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 754688, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531968, "linear_attention_total": 2359296, "linear_attention_nnz": 373760, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460032, "linear_attention_total": 2359296, "linear_attention_nnz": 311040, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}}, "total_sparsity": 64.26190156654981, "linear_sparsity": 82.38902150848766}, "speed": {"eval_elapsed_time": 15.605650334036909}, "opt_eval_metrics": {"exact_match": 78.69441816461683, "f1": 86.58409293332078}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 50872322, "linear_total": 84934656, "linear_nnz": 26913792, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692352, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 2007552}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666496, "linear_attention_total": 2359296, "linear_attention_nnz": 646656, "linear_dense_total": 4718592, "linear_dense_nnz": 2019840}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2931200, "linear_attention_total": 2359296, "linear_attention_nnz": 691712, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3361024, "linear_attention_total": 2359296, "linear_attention_nnz": 1149184, "linear_dense_total": 4718592, "linear_dense_nnz": 2211840}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3165952, "linear_attention_total": 2359296, "linear_attention_nnz": 1007872, "linear_dense_total": 4718592, "linear_dense_nnz": 2158080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070976, "linear_attention_total": 2359296, "linear_attention_nnz": 997376, "linear_dense_total": 4718592, "linear_dense_nnz": 2073600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2644480, "linear_attention_total": 2359296, "linear_attention_nnz": 911872, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2248704, "linear_attention_total": 2359296, "linear_attention_nnz": 944640, "linear_dense_total": 4718592, "linear_dense_nnz": 1304064}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1514240, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 751104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 839424, "linear_attention_total": 2359296, "linear_attention_nnz": 526080, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 707072, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 274944}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1071872, "linear_attention_total": 2359296, "linear_attention_nnz": 277760, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}}, "total_sparsity": 53.282364242699266, "linear_sparsity": 68.31235532407408}, "speed": {"eval_elapsed_time": 20.12763703102246}, "opt_eval_metrics": {"exact_match": 79.94323557237465, "f1": 87.52956877579788}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl225_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 27752545, "linear_total": 84934656, "linear_nnz": 3794015, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 465383, "linear_attention_total": 2359296, "linear_attention_nnz": 18728, "linear_dense_total": 4718592, "linear_dense_nnz": 446655}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527397, "linear_attention_total": 2359296, "linear_attention_nnz": 63059, "linear_dense_total": 4718592, "linear_dense_nnz": 464338}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516492, "linear_attention_total": 2359296, "linear_attention_nnz": 53761, "linear_dense_total": 4718592, "linear_dense_nnz": 462731}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 514972, "linear_attention_total": 2359296, "linear_attention_nnz": 84624, "linear_dense_total": 4718592, "linear_dense_nnz": 430348}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 443214, "linear_attention_total": 2359296, "linear_attention_nnz": 58345, "linear_dense_total": 4718592, "linear_dense_nnz": 384869}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 396921, "linear_attention_total": 2359296, "linear_attention_nnz": 50615, "linear_dense_total": 4718592, "linear_dense_nnz": 346306}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 319004, "linear_attention_total": 2359296, "linear_attention_nnz": 41344, "linear_dense_total": 4718592, "linear_dense_nnz": 277660}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 249183, "linear_attention_total": 2359296, "linear_attention_nnz": 47420, "linear_dense_total": 4718592, "linear_dense_nnz": 201763}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 161062, "linear_attention_total": 2359296, "linear_attention_nnz": 27562, "linear_dense_total": 4718592, "linear_dense_nnz": 133500}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 81705, "linear_attention_total": 2359296, "linear_attention_nnz": 34151, "linear_dense_total": 4718592, "linear_dense_nnz": 47554}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64643, "linear_attention_total": 2359296, "linear_attention_nnz": 21311, "linear_dense_total": 4718592, "linear_dense_nnz": 43332}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 54039, "linear_attention_total": 2359296, "linear_attention_nnz": 17233, "linear_dense_total": 4718592, "linear_dense_nnz": 36806}}, "total_sparsity": 74.51397463933142, "linear_sparsity": 95.5330189363456}, "speed": {"eval_elapsed_time": 69.66989313997328, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-45000": {"stats": {"total": 108893186, "nnz": 52655769, "linear_total": 84934656, "linear_nnz": 28740096, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3704832, "linear_attention_total": 2359296, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 2131968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2818560, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 2327040}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3674112, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2494464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3592704, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2314752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2942976, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 2156544}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2844672, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 2156544}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2363904, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 1675776}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1738752, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1345536}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1665024, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 878592}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893952, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 402432}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059840, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1440768, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 949248}}, "total_sparsity": 51.64456938563631, "linear_sparsity": 66.162109375}, "speed": {"eval_elapsed_time": 15.83343747886829}, "opt_eval_metrics": {"exact_match": 78.1929990539262, "f1": 85.92206431273945}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 47478801, "linear_total": 84934656, "linear_nnz": 23566848, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2959872, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1485312}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2121216, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1728000}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3085824, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1906176}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3084288, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1806336}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2422272, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 1734144}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2302464, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1712640}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1878528, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1288704}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437696, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1044480}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1494528, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 708096}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 617472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 322560}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 947712, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 456192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1214976, "linear_attention_total": 2359296, "linear_attention_nnz": 442368, "linear_dense_total": 4718592, "linear_dense_nnz": 772608}}, "total_sparsity": 56.39874013788153, "linear_sparsity": 72.2529658564815}, "speed": {"eval_elapsed_time": 14.195255008991808}, "opt_eval_metrics": {"exact_match": 77.69157994323557, "f1": 85.75507572992562}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41730197, "linear_total": 84934656, "linear_nnz": 17822208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2202624, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2397696, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1218048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2302464, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1122816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1794048, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1460736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1155072, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1290240, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 503808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 423936, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 227328}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 806400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 904704, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 511488}}, "total_sparsity": 61.67786201057612, "linear_sparsity": 79.0165653935185}, "speed": {"eval_elapsed_time": 12.292132368078455}, "opt_eval_metrics": {"exact_match": 77.70104068117313, "f1": 85.6071153919288}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 33917936, "linear_total": 84934656, "linear_nnz": 9959406, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1111233, "linear_attention_total": 2359296, "linear_attention_nnz": 56754, "linear_dense_total": 4718592, "linear_dense_nnz": 1054479}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222867, "linear_attention_total": 2359296, "linear_attention_nnz": 116764, "linear_dense_total": 4718592, "linear_dense_nnz": 1106103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264439, "linear_attention_total": 2359296, "linear_attention_nnz": 127558, "linear_dense_total": 4718592, "linear_dense_nnz": 1136881}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1270104, "linear_attention_total": 2359296, "linear_attention_nnz": 163709, "linear_dense_total": 4718592, "linear_dense_nnz": 1106395}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1202300, "linear_attention_total": 2359296, "linear_attention_nnz": 158018, "linear_dense_total": 4718592, "linear_dense_nnz": 1044282}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1136195, "linear_attention_total": 2359296, "linear_attention_nnz": 125746, "linear_dense_total": 4718592, "linear_dense_nnz": 1010449}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 971117, "linear_attention_total": 2359296, "linear_attention_nnz": 110023, "linear_dense_total": 4718592, "linear_dense_nnz": 861094}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746075, "linear_attention_total": 2359296, "linear_attention_nnz": 113086, "linear_dense_total": 4718592, "linear_dense_nnz": 632989}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488971, "linear_attention_total": 2359296, "linear_attention_nnz": 81879, "linear_dense_total": 4718592, "linear_dense_nnz": 407092}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 250695, "linear_attention_total": 2359296, "linear_attention_nnz": 77365, "linear_dense_total": 4718592, "linear_dense_nnz": 173330}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 172793, "linear_attention_total": 2359296, "linear_attention_nnz": 50915, "linear_dense_total": 4718592, "linear_dense_nnz": 121878}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 122617, "linear_attention_total": 2359296, "linear_attention_nnz": 28303, "linear_dense_total": 4718592, "linear_dense_nnz": 94314}}, "total_sparsity": 68.85210429971255, "linear_sparsity": 88.27403739646628}, "speed": {"eval_elapsed_time": 75.02001089300029}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33825359, "linear_total": 84934656, "linear_nnz": 9866829, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1100628, "linear_attention_total": 2359296, "linear_attention_nnz": 56086, "linear_dense_total": 4718592, "linear_dense_nnz": 1044542}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211778, "linear_attention_total": 2359296, "linear_attention_nnz": 115328, "linear_dense_total": 4718592, "linear_dense_nnz": 1096450}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253069, "linear_attention_total": 2359296, "linear_attention_nnz": 125881, "linear_dense_total": 4718592, "linear_dense_nnz": 1127188}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258511, "linear_attention_total": 2359296, "linear_attention_nnz": 161525, "linear_dense_total": 4718592, "linear_dense_nnz": 1096986}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1191705, "linear_attention_total": 2359296, "linear_attention_nnz": 155911, "linear_dense_total": 4718592, "linear_dense_nnz": 1035794}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125428, "linear_attention_total": 2359296, "linear_attention_nnz": 123921, "linear_dense_total": 4718592, "linear_dense_nnz": 1001507}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 961919, "linear_attention_total": 2359296, "linear_attention_nnz": 108430, "linear_dense_total": 4718592, "linear_dense_nnz": 853489}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738628, "linear_attention_total": 2359296, "linear_attention_nnz": 111505, "linear_dense_total": 4718592, "linear_dense_nnz": 627123}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484188, "linear_attention_total": 2359296, "linear_attention_nnz": 80805, "linear_dense_total": 4718592, "linear_dense_nnz": 403383}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 247948, "linear_attention_total": 2359296, "linear_attention_nnz": 76456, "linear_dense_total": 4718592, "linear_dense_nnz": 171492}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 171235, "linear_attention_total": 2359296, "linear_attention_nnz": 50374, "linear_dense_total": 4718592, "linear_dense_nnz": 120861}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 121792, "linear_attention_total": 2359296, "linear_attention_nnz": 28038, "linear_dense_total": 4718592, "linear_dense_nnz": 93754}}, "total_sparsity": 68.93712063856779, "linear_sparsity": 88.38303530657733}, "speed": {"eval_elapsed_time": 75.69579442497343}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl150_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 29470276, "linear_total": 84934656, "linear_nnz": 5511746, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655184, "linear_attention_total": 2359296, "linear_attention_nnz": 30729, "linear_dense_total": 4718592, "linear_dense_nnz": 624455}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 733131, "linear_attention_total": 2359296, "linear_attention_nnz": 77742, "linear_dense_total": 4718592, "linear_dense_nnz": 655389}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730379, "linear_attention_total": 2359296, "linear_attention_nnz": 70206, "linear_dense_total": 4718592, "linear_dense_nnz": 660173}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 734451, "linear_attention_total": 2359296, "linear_attention_nnz": 106339, "linear_dense_total": 4718592, "linear_dense_nnz": 628112}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655863, "linear_attention_total": 2359296, "linear_attention_nnz": 81845, "linear_dense_total": 4718592, "linear_dense_nnz": 574018}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 606306, "linear_attention_total": 2359296, "linear_attention_nnz": 68554, "linear_dense_total": 4718592, "linear_dense_nnz": 537752}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 492846, "linear_attention_total": 2359296, "linear_attention_nnz": 58217, "linear_dense_total": 4718592, "linear_dense_nnz": 434629}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379389, "linear_attention_total": 2359296, "linear_attention_nnz": 65705, "linear_dense_total": 4718592, "linear_dense_nnz": 313684}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 243207, "linear_attention_total": 2359296, "linear_attention_nnz": 39483, "linear_dense_total": 4718592, "linear_dense_nnz": 203724}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 119606, "linear_attention_total": 2359296, "linear_attention_nnz": 46007, "linear_dense_total": 4718592, "linear_dense_nnz": 73599}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 89281, "linear_attention_total": 2359296, "linear_attention_nnz": 27892, "linear_dense_total": 4718592, "linear_dense_nnz": 61389}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 72103, "linear_attention_total": 2359296, "linear_attention_nnz": 20781, "linear_dense_total": 4718592, "linear_dense_nnz": 51322}}, "total_sparsity": 72.93652882926945, "linear_sparsity": 93.51060419906804}, "speed": {"eval_elapsed_time": 71.46695366402855, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 76185762, "linear_total": 84934656, "linear_nnz": 52244480, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4866048, "linear_attention_total": 2359296, "linear_attention_nnz": 782336, "linear_dense_total": 4718592, "linear_dense_nnz": 4083712}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4853760, "linear_attention_total": 2359296, "linear_attention_nnz": 658432, "linear_dense_total": 4718592, "linear_dense_nnz": 4195328}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5263360, "linear_attention_total": 2359296, "linear_attention_nnz": 1003520, "linear_dense_total": 4718592, "linear_dense_nnz": 4259840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5614592, "linear_attention_total": 2359296, "linear_attention_nnz": 1281024, "linear_dense_total": 4718592, "linear_dense_nnz": 4333568}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5713920, "linear_attention_total": 2359296, "linear_attention_nnz": 1423360, "linear_dense_total": 4718592, "linear_dense_nnz": 4290560}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5517312, "linear_attention_total": 2359296, "linear_attention_nnz": 1231872, "linear_dense_total": 4718592, "linear_dense_nnz": 4285440}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5399552, "linear_attention_total": 2359296, "linear_attention_nnz": 1243136, "linear_dense_total": 4718592, "linear_dense_nnz": 4156416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4904960, "linear_attention_total": 2359296, "linear_attention_nnz": 975872, "linear_dense_total": 4718592, "linear_dense_nnz": 3929088}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4467712, "linear_attention_total": 2359296, "linear_attention_nnz": 971776, "linear_dense_total": 4718592, "linear_dense_nnz": 3495936}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2535424, "linear_attention_total": 2359296, "linear_attention_nnz": 649216, "linear_dense_total": 4718592, "linear_dense_nnz": 1886208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1672192, "linear_attention_total": 2359296, "linear_attention_nnz": 510976, "linear_dense_total": 4718592, "linear_dense_nnz": 1161216}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1435648, "linear_attention_total": 2359296, "linear_attention_nnz": 322560, "linear_dense_total": 4718592, "linear_dense_nnz": 1113088}}, "total_sparsity": 30.036244875781303, "linear_sparsity": 38.488618827160494}, "speed": {"eval_elapsed_time": 30.923503675032407}, "opt_eval_metrics": {"exact_match": 79.97161778618732, "f1": 87.71774903593533}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39356610, "linear_total": 84934656, "linear_nnz": 15444992, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1024000, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 512000}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236992, "linear_attention_total": 2359296, "linear_attention_nnz": 551936, "linear_dense_total": 4718592, "linear_dense_nnz": 685056}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1934336, "linear_attention_total": 2359296, "linear_attention_nnz": 722944, "linear_dense_total": 4718592, "linear_dense_nnz": 1211392}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2352128, "linear_attention_total": 2359296, "linear_attention_nnz": 954368, "linear_dense_total": 4718592, "linear_dense_nnz": 1397760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028544, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1238016}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1880064, "linear_attention_total": 2359296, "linear_attention_nnz": 584704, "linear_dense_total": 4718592, "linear_dense_nnz": 1295360}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1627136, "linear_attention_total": 2359296, "linear_attention_nnz": 608256, "linear_dense_total": 4718592, "linear_dense_nnz": 1018880}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1316864, "linear_attention_total": 2359296, "linear_attention_nnz": 740352, "linear_dense_total": 4718592, "linear_dense_nnz": 576512}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 673792, "linear_attention_total": 2359296, "linear_attention_nnz": 510976, "linear_dense_total": 4718592, "linear_dense_nnz": 162816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 451584, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 94208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 304128, "linear_dense_total": 4718592, "linear_dense_nnz": 197632}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417792, "linear_attention_total": 2359296, "linear_attention_nnz": 197632, "linear_dense_total": 4718592, "linear_dense_nnz": 220160}}, "total_sparsity": 63.85760078688487, "linear_sparsity": 81.81544174382715}, "speed": {"eval_elapsed_time": 20.066182799171656}, "opt_eval_metrics": {"exact_match": 76.79280983916746, "f1": 85.3167029862563}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39183362, "linear_total": 84934656, "linear_nnz": 15271936, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1032192, "linear_attention_total": 2359296, "linear_attention_nnz": 513024, "linear_dense_total": 4718592, "linear_dense_nnz": 519168}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1215488, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 692224}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1922048, "linear_attention_total": 2359296, "linear_attention_nnz": 683008, "linear_dense_total": 4718592, "linear_dense_nnz": 1239040}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2319360, "linear_attention_total": 2359296, "linear_attention_nnz": 945152, "linear_dense_total": 4718592, "linear_dense_nnz": 1374208}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045952, "linear_attention_total": 2359296, "linear_attention_nnz": 809984, "linear_dense_total": 4718592, "linear_dense_nnz": 1235968}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1847296, "linear_attention_total": 2359296, "linear_attention_nnz": 581632, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1607680, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1287168, "linear_attention_total": 2359296, "linear_attention_nnz": 708608, "linear_dense_total": 4718592, "linear_dense_nnz": 578560}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631808, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 158720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 442368, "linear_attention_total": 2359296, "linear_attention_nnz": 352256, "linear_dense_total": 4718592, "linear_dense_nnz": 90112}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 312320, "linear_dense_total": 4718592, "linear_dense_nnz": 206848}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 401408, "linear_attention_total": 2359296, "linear_attention_nnz": 186368, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 64.01669981444019, "linear_sparsity": 82.0191936728395}, "speed": {"eval_elapsed_time": 19.653059495147318}, "opt_eval_metrics": {"exact_match": 77.04824976348155, "f1": 85.17930403802184}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-50000": {"stats": {"total": 108893186, "nnz": 40770050, "linear_total": 84934656, "linear_nnz": 16811520, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2190336, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1641984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 855552}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2171904, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 992256}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1900032, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1717248, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 1029120}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1789440, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1003008}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1319424, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 729600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1030656, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 637440}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1179648, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 393216}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 715776, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 224256}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}}, "total_sparsity": 62.55959486757969, "linear_sparsity": 80.20652488425925}, "speed": {"eval_elapsed_time": 12.685803183936514}, "opt_eval_metrics": {"exact_match": 75.79943235572375, "f1": 84.3797785815339}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 37334018, "linear_total": 84934656, "linear_nnz": 13375488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663488, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1282560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451520, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 566784}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1385472, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1265664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 675840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1069056, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.7150099364344, "linear_sparsity": 84.25202546296296}, "speed": {"eval_elapsed_time": 11.091999777941965}, "opt_eval_metrics": {"exact_match": 76.54683065279092, "f1": 84.56290825102765}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37189634, "linear_total": 84934656, "linear_nnz": 13231104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1658880, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1233408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1354752, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1387008, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 678912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.84760225492897, "linear_sparsity": 84.42201967592592}, "speed": {"eval_elapsed_time": 11.029475754010491}, "opt_eval_metrics": {"exact_match": 75.99810785241249, "f1": 84.26442986520863}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36773378, "linear_total": 84934656, "linear_nnz": 12814848, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1044480, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1177088, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1450496, "linear_attention_total": 2359296, "linear_attention_nnz": 492032, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652224, "linear_attention_total": 2359296, "linear_attention_nnz": 733696, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1511680, "linear_attention_total": 2359296, "linear_attention_nnz": 461056, "linear_dense_total": 4718592, "linear_dense_nnz": 1050624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1533952, "linear_attention_total": 2359296, "linear_attention_nnz": 580096, "linear_dense_total": 4718592, "linear_dense_nnz": 953856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1227520, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 764928}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 624384, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 700416, "linear_attention_total": 2359296, "linear_attention_nnz": 351744, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 479744, "linear_attention_total": 2359296, "linear_attention_nnz": 339968, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411392, "linear_attention_total": 2359296, "linear_attention_nnz": 276224, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 430080, "linear_attention_total": 2359296, "linear_attention_nnz": 178176, "linear_dense_total": 4718592, "linear_dense_nnz": 251904}}, "total_sparsity": 66.22986308803564, "linear_sparsity": 84.912109375}, "speed": {"eval_elapsed_time": 14.409963917918503}, "opt_eval_metrics": {"exact_match": 77.93755912961211, "f1": 86.0611894864831}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 47136529, "linear_total": 84934656, "linear_nnz": 23220736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1984512, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2164224, "linear_attention_total": 2359296, "linear_attention_nnz": 592896, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2625536, "linear_attention_total": 2359296, "linear_attention_nnz": 880640, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2992640, "linear_attention_total": 2359296, "linear_attention_nnz": 1230848, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2940928, "linear_attention_total": 2359296, "linear_attention_nnz": 1214464, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2535936, "linear_attention_total": 2359296, "linear_attention_nnz": 906240, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2213376, "linear_attention_total": 2359296, "linear_attention_nnz": 943104, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1923584, "linear_attention_total": 2359296, "linear_attention_nnz": 935936, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1419264, "linear_attention_total": 2359296, "linear_attention_nnz": 872448, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 883712, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667648, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 869376, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 56.713059162397904, "linear_sparsity": 72.66046971450618}, "speed": {"eval_elapsed_time": 19.07873147400096}, "opt_eval_metrics": {"exact_match": 80.27436140018922, "f1": 87.70461789964966}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46572775, "linear_total": 84934656, "linear_nnz": 22657536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1908736, "linear_attention_total": 2359296, "linear_attention_nnz": 627712, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2145280, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 1548288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2499072, "linear_attention_total": 2359296, "linear_attention_nnz": 789504, "linear_dense_total": 4718592, "linear_dense_nnz": 1709568}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2920960, "linear_attention_total": 2359296, "linear_attention_nnz": 1180672, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1204224, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2516992, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 1600512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2151936, "linear_attention_total": 2359296, "linear_attention_nnz": 909312, "linear_dense_total": 4718592, "linear_dense_nnz": 1242624}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1889792, "linear_attention_total": 2359296, "linear_attention_nnz": 917504, "linear_dense_total": 4718592, "linear_dense_nnz": 972288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1398272, "linear_attention_total": 2359296, "linear_attention_nnz": 856064, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858624, "linear_attention_total": 2359296, "linear_attention_nnz": 611328, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 634368, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 827392, "linear_attention_total": 2359296, "linear_attention_nnz": 268288, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}}, "total_sparsity": 57.23077199706509, "linear_sparsity": 73.32356770833333}, "speed": {"eval_elapsed_time": 18.581735570915043}, "opt_eval_metrics": {"exact_match": 79.80132450331126, "f1": 87.48291010744668}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-25000": {"stats": {"total": 108893186, "nnz": 97257474, "linear_total": 84934656, "linear_nnz": 73298944, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6452736, "linear_attention_total": 2359296, "linear_attention_nnz": 1889280, "linear_dense_total": 4718592, "linear_dense_nnz": 4563456}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6338048, "linear_attention_total": 2359296, "linear_attention_nnz": 1839104, "linear_dense_total": 4718592, "linear_dense_nnz": 4498944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6545920, "linear_attention_total": 2359296, "linear_attention_nnz": 2037760, "linear_dense_total": 4718592, "linear_dense_nnz": 4508160}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6564864, "linear_attention_total": 2359296, "linear_attention_nnz": 2056704, "linear_dense_total": 4718592, "linear_dense_nnz": 4508160}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6459904, "linear_attention_total": 2359296, "linear_attention_nnz": 2000896, "linear_dense_total": 4718592, "linear_dense_nnz": 4459008}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6377728, "linear_attention_total": 2359296, "linear_attention_nnz": 1963264, "linear_dense_total": 4718592, "linear_dense_nnz": 4414464}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6199808, "linear_attention_total": 2359296, "linear_attention_nnz": 1899008, "linear_dense_total": 4718592, "linear_dense_nnz": 4300800}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6070272, "linear_attention_total": 2359296, "linear_attention_nnz": 1938432, "linear_dense_total": 4718592, "linear_dense_nnz": 4131840}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5774848, "linear_attention_total": 2359296, "linear_attention_nnz": 1905664, "linear_dense_total": 4718592, "linear_dense_nnz": 3869184}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5543168, "linear_attention_total": 2359296, "linear_attention_nnz": 1779968, "linear_dense_total": 4718592, "linear_dense_nnz": 3763200}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5265920, "linear_attention_total": 2359296, "linear_attention_nnz": 1662464, "linear_dense_total": 4718592, "linear_dense_nnz": 3603456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5705728, "linear_attention_total": 2359296, "linear_attention_nnz": 1722880, "linear_dense_total": 4718592, "linear_dense_nnz": 3982848}}, "total_sparsity": 10.685436276976967, "linear_sparsity": 13.699604552469136}, "speed": {"eval_elapsed_time": 39.34595324099064}, "opt_eval_metrics": {"exact_match": 70.2554399243141, "f1": 79.54398424308184}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 58295010, "linear_total": 84934656, "linear_nnz": 34364416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2712576, "linear_attention_total": 2359296, "linear_attention_nnz": 934912, "linear_dense_total": 4718592, "linear_dense_nnz": 1777664}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2907136, "linear_attention_total": 2359296, "linear_attention_nnz": 738304, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4201472, "linear_attention_total": 2359296, "linear_attention_nnz": 1162240, "linear_dense_total": 4718592, "linear_dense_nnz": 3039232}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4531200, "linear_attention_total": 2359296, "linear_attention_nnz": 1366016, "linear_dense_total": 4718592, "linear_dense_nnz": 3165184}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4667392, "linear_attention_total": 2359296, "linear_attention_nnz": 1484800, "linear_dense_total": 4718592, "linear_dense_nnz": 3182592}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4599808, "linear_attention_total": 2359296, "linear_attention_nnz": 1414144, "linear_dense_total": 4718592, "linear_dense_nnz": 3185664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3828736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 2572288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2659328, "linear_attention_total": 2359296, "linear_attention_nnz": 991232, "linear_dense_total": 4718592, "linear_dense_nnz": 1668096}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1654784, "linear_attention_total": 2359296, "linear_attention_nnz": 966656, "linear_dense_total": 4718592, "linear_dense_nnz": 688128}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 927744, "linear_attention_total": 2359296, "linear_attention_nnz": 691200, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 873472, "linear_attention_total": 2359296, "linear_attention_nnz": 530432, "linear_dense_total": 4718592, "linear_dense_nnz": 343040}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 800768, "linear_attention_total": 2359296, "linear_attention_nnz": 378880, "linear_dense_total": 4718592, "linear_dense_nnz": 421888}}, "total_sparsity": 46.46587895775224, "linear_sparsity": 59.540171682098766}, "speed": {"eval_elapsed_time": 28.695044982945547}, "opt_eval_metrics": {"exact_match": 79.64995269631031, "f1": 87.30139925832849}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 56885634, "linear_total": 84934656, "linear_nnz": 32956416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2584576, "linear_attention_total": 2359296, "linear_attention_nnz": 949248, "linear_dense_total": 4718592, "linear_dense_nnz": 1635328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2798592, "linear_attention_total": 2359296, "linear_attention_nnz": 750592, "linear_dense_total": 4718592, "linear_dense_nnz": 2048000}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4019200, "linear_attention_total": 2359296, "linear_attention_nnz": 1123328, "linear_dense_total": 4718592, "linear_dense_nnz": 2895872}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4274176, "linear_attention_total": 2359296, "linear_attention_nnz": 1306624, "linear_dense_total": 4718592, "linear_dense_nnz": 2967552}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4581376, "linear_attention_total": 2359296, "linear_attention_nnz": 1475584, "linear_dense_total": 4718592, "linear_dense_nnz": 3105792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4219904, "linear_attention_total": 2359296, "linear_attention_nnz": 1285120, "linear_dense_total": 4718592, "linear_dense_nnz": 2934784}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3736576, "linear_attention_total": 2359296, "linear_attention_nnz": 1235968, "linear_dense_total": 4718592, "linear_dense_nnz": 2500608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2587648, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1604608}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1627136, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 661504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 880640, "linear_attention_total": 2359296, "linear_attention_nnz": 650240, "linear_dense_total": 4718592, "linear_dense_nnz": 230400}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 862208, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 352256}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784384, "linear_attention_total": 2359296, "linear_attention_nnz": 363520, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}}, "total_sparsity": 47.76015277944021, "linear_sparsity": 61.19791666666667}, "speed": {"eval_elapsed_time": 28.08546430291608}, "opt_eval_metrics": {"exact_match": 79.90539262062441, "f1": 87.36378709007766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 55520034, "linear_total": 84934656, "linear_nnz": 31592448, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2382848, "linear_attention_total": 2359296, "linear_attention_nnz": 889856, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2568192, "linear_attention_total": 2359296, "linear_attention_nnz": 717824, "linear_dense_total": 4718592, "linear_dense_nnz": 1850368}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3915776, "linear_attention_total": 2359296, "linear_attention_nnz": 1113088, "linear_dense_total": 4718592, "linear_dense_nnz": 2802688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4258816, "linear_attention_total": 2359296, "linear_attention_nnz": 1297408, "linear_dense_total": 4718592, "linear_dense_nnz": 2961408}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4300800, "linear_attention_total": 2359296, "linear_attention_nnz": 1402880, "linear_dense_total": 4718592, "linear_dense_nnz": 2897920}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4030464, "linear_attention_total": 2359296, "linear_attention_nnz": 1157120, "linear_dense_total": 4718592, "linear_dense_nnz": 2873344}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3661824, "linear_attention_total": 2359296, "linear_attention_nnz": 1187840, "linear_dense_total": 4718592, "linear_dense_nnz": 2473984}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2507776, "linear_attention_total": 2359296, "linear_attention_nnz": 979968, "linear_dense_total": 4718592, "linear_dense_nnz": 1527808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1562624, "linear_attention_total": 2359296, "linear_attention_nnz": 952320, "linear_dense_total": 4718592, "linear_dense_nnz": 610304}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 865280, "linear_attention_total": 2359296, "linear_attention_nnz": 642048, "linear_dense_total": 4718592, "linear_dense_nnz": 223232}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818176, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 328704}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 719872, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 388096}}, "total_sparsity": 49.0142257386059, "linear_sparsity": 62.80381944444444}, "speed": {"eval_elapsed_time": 27.40028947405517}, "opt_eval_metrics": {"exact_match": 79.82024597918638, "f1": 87.30735739624531}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 55329122, "linear_total": 84934656, "linear_nnz": 31404032, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2845696, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 2385920}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3173376, "linear_attention_total": 2359296, "linear_attention_nnz": 374784, "linear_dense_total": 4718592, "linear_dense_nnz": 2798592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3866624, "linear_attention_total": 2359296, "linear_attention_nnz": 411648, "linear_dense_total": 4718592, "linear_dense_nnz": 3454976}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4224000, "linear_attention_total": 2359296, "linear_attention_nnz": 727040, "linear_dense_total": 4718592, "linear_dense_nnz": 3496960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3954688, "linear_attention_total": 2359296, "linear_attention_nnz": 541696, "linear_dense_total": 4718592, "linear_dense_nnz": 3412992}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3993600, "linear_attention_total": 2359296, "linear_attention_nnz": 545792, "linear_dense_total": 4718592, "linear_dense_nnz": 3447808}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3427328, "linear_attention_total": 2359296, "linear_attention_nnz": 493568, "linear_dense_total": 4718592, "linear_dense_nnz": 2933760}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2641920, "linear_attention_total": 2359296, "linear_attention_nnz": 641024, "linear_dense_total": 4718592, "linear_dense_nnz": 2000896}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1293312, "linear_attention_total": 2359296, "linear_attention_nnz": 288768, "linear_dense_total": 4718592, "linear_dense_nnz": 1004544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 678912, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 339968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 671744, "linear_attention_total": 2359296, "linear_attention_nnz": 254976, "linear_dense_total": 4718592, "linear_dense_nnz": 416768}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 632832, "linear_attention_total": 2359296, "linear_attention_nnz": 165888, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}}, "total_sparsity": 49.1895461668281, "linear_sparsity": 63.025655864197525}, "speed": {"eval_elapsed_time": 25.00334911304526}, "opt_eval_metrics": {"exact_match": 77.68211920529801, "f1": 86.11161494070976}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 54788706, "linear_total": 84934656, "linear_nnz": 30864384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2661376, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 2226176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3087360, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 2727936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3895296, "linear_attention_total": 2359296, "linear_attention_nnz": 421888, "linear_dense_total": 4718592, "linear_dense_nnz": 3473408}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4162560, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 3451904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3985408, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 3437568}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3881984, "linear_attention_total": 2359296, "linear_attention_nnz": 556032, "linear_dense_total": 4718592, "linear_dense_nnz": 3325952}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3340288, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 2828288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2614272, "linear_attention_total": 2359296, "linear_attention_nnz": 622592, "linear_dense_total": 4718592, "linear_dense_nnz": 1991680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1256448, "linear_attention_total": 2359296, "linear_attention_nnz": 276480, "linear_dense_total": 4718592, "linear_dense_nnz": 979968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668672, "linear_attention_total": 2359296, "linear_attention_nnz": 337920, "linear_dense_total": 4718592, "linear_dense_nnz": 330752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 664576, "linear_attention_total": 2359296, "linear_attention_nnz": 252928, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 646144, "linear_attention_total": 2359296, "linear_attention_nnz": 158720, "linear_dense_total": 4718592, "linear_dense_nnz": 487424}}, "total_sparsity": 49.68582699012958, "linear_sparsity": 63.66102430555556}, "speed": {"eval_elapsed_time": 24.69693502294831}, "opt_eval_metrics": {"exact_match": 77.96594134342479, "f1": 86.01491496793933}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53430466, "linear_total": 84934656, "linear_nnz": 29507584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2533376, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 2119680}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2840576, "linear_attention_total": 2359296, "linear_attention_nnz": 364544, "linear_dense_total": 4718592, "linear_dense_nnz": 2476032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3843072, "linear_attention_total": 2359296, "linear_attention_nnz": 397312, "linear_dense_total": 4718592, "linear_dense_nnz": 3445760}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4069376, "linear_attention_total": 2359296, "linear_attention_nnz": 666624, "linear_dense_total": 4718592, "linear_dense_nnz": 3402752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3831808, "linear_attention_total": 2359296, "linear_attention_nnz": 492544, "linear_dense_total": 4718592, "linear_dense_nnz": 3339264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3714048, "linear_attention_total": 2359296, "linear_attention_nnz": 519168, "linear_dense_total": 4718592, "linear_dense_nnz": 3194880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3200000, "linear_attention_total": 2359296, "linear_attention_nnz": 448512, "linear_dense_total": 4718592, "linear_dense_nnz": 2751488}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415616, "linear_attention_total": 2359296, "linear_attention_nnz": 576512, "linear_dense_total": 4718592, "linear_dense_nnz": 1839104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211392, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619520, "linear_attention_total": 2359296, "linear_attention_nnz": 317440, "linear_dense_total": 4718592, "linear_dense_nnz": 302080}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 623616, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 386048}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 50.93314103235074, "linear_sparsity": 65.25848765432099}, "speed": {"eval_elapsed_time": 24.108074960997328}, "opt_eval_metrics": {"exact_match": 77.70104068117313, "f1": 85.88451743537976}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 44512539, "linear_total": 84934656, "linear_nnz": 20599296, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1869312, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 1254912}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2022400, "linear_attention_total": 2359296, "linear_attention_nnz": 612352, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2259968, "linear_attention_total": 2359296, "linear_attention_nnz": 699392, "linear_dense_total": 4718592, "linear_dense_nnz": 1560576}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2543616, "linear_attention_total": 2359296, "linear_attention_nnz": 1010688, "linear_dense_total": 4718592, "linear_dense_nnz": 1532928}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2463744, "linear_attention_total": 2359296, "linear_attention_nnz": 927744, "linear_dense_total": 4718592, "linear_dense_nnz": 1536000}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2345472, "linear_attention_total": 2359296, "linear_attention_nnz": 872448, "linear_dense_total": 4718592, "linear_dense_nnz": 1473024}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2036224, "linear_attention_total": 2359296, "linear_attention_nnz": 867328, "linear_dense_total": 4718592, "linear_dense_nnz": 1168896}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1800192, "linear_attention_total": 2359296, "linear_attention_nnz": 897024, "linear_dense_total": 4718592, "linear_dense_nnz": 903168}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1165824, "linear_attention_total": 2359296, "linear_attention_nnz": 642048, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 734208, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 576512, "linear_attention_total": 2359296, "linear_attention_nnz": 386048, "linear_dense_total": 4718592, "linear_dense_nnz": 190464}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 781824, "linear_attention_total": 2359296, "linear_attention_nnz": 261120, "linear_dense_total": 4718592, "linear_dense_nnz": 520704}}, "total_sparsity": 59.12275080279128, "linear_sparsity": 75.7468894675926}, "speed": {"eval_elapsed_time": 17.345293765887618}, "opt_eval_metrics": {"exact_match": 79.36613055818354, "f1": 87.31339978481493}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 40733175, "linear_total": 84934656, "linear_nnz": 16822784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1394688, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1640960, "linear_attention_total": 2359296, "linear_attention_nnz": 539648, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1878528, "linear_attention_total": 2359296, "linear_attention_nnz": 657408, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 864256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1902592, "linear_attention_total": 2359296, "linear_attention_nnz": 686080, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601536, "linear_attention_total": 2359296, "linear_attention_nnz": 649216, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1507328, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908800, "linear_attention_total": 2359296, "linear_attention_nnz": 474112, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 354304, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591872, "linear_attention_total": 2359296, "linear_attention_nnz": 226304, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 62.593458327135366, "linear_sparsity": 80.19326292438271}, "speed": {"eval_elapsed_time": 15.926922732032835}, "opt_eval_metrics": {"exact_match": 78.68495742667928, "f1": 86.66781681977909}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40239113, "linear_total": 84934656, "linear_nnz": 16329216, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1344512, "linear_attention_total": 2359296, "linear_attention_nnz": 518144, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 516096, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842176, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2097664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1184256}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2056192, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 664576, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1565184, "linear_attention_total": 2359296, "linear_attention_nnz": 629760, "linear_dense_total": 4718592, "linear_dense_nnz": 935424}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1486336, "linear_attention_total": 2359296, "linear_attention_nnz": 787456, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 844288, "linear_attention_total": 2359296, "linear_attention_nnz": 415744, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 423936, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 472064, "linear_attention_total": 2359296, "linear_attention_nnz": 324608, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555520, "linear_attention_total": 2359296, "linear_attention_nnz": 209920, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}}, "total_sparsity": 63.04717083032174, "linear_sparsity": 80.7743778935185}, "speed": {"eval_elapsed_time": 15.737465491052717}, "opt_eval_metrics": {"exact_match": 78.80794701986756, "f1": 86.74156854566804}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 46385410, "linear_total": 84934656, "linear_nnz": 22426880, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2004992, "linear_attention_total": 2359296, "linear_attention_nnz": 594944, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2177280, "linear_attention_total": 2359296, "linear_attention_nnz": 672000, "linear_dense_total": 4718592, "linear_dense_nnz": 1505280}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2592256, "linear_attention_total": 2359296, "linear_attention_nnz": 859648, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2838016, "linear_attention_total": 2359296, "linear_attention_nnz": 1172992, "linear_dense_total": 4718592, "linear_dense_nnz": 1665024}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2712832, "linear_attention_total": 2359296, "linear_attention_nnz": 1026304, "linear_dense_total": 4718592, "linear_dense_nnz": 1686528}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2600448, "linear_attention_total": 2359296, "linear_attention_nnz": 976896, "linear_dense_total": 4718592, "linear_dense_nnz": 1623552}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2246144, "linear_attention_total": 2359296, "linear_attention_nnz": 955904, "linear_dense_total": 4718592, "linear_dense_nnz": 1290240}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842688, "linear_attention_total": 2359296, "linear_attention_nnz": 901120, "linear_dense_total": 4718592, "linear_dense_nnz": 941568}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1241856, "linear_attention_total": 2359296, "linear_attention_nnz": 718080, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 693760, "linear_attention_total": 2359296, "linear_attention_nnz": 475648, "linear_dense_total": 4718592, "linear_dense_nnz": 218112}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640768, "linear_attention_total": 2359296, "linear_attention_nnz": 441088, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835840, "linear_attention_total": 2359296, "linear_attention_nnz": 246016, "linear_dense_total": 4718592, "linear_dense_nnz": 589824}}, "total_sparsity": 57.4028351048522, "linear_sparsity": 73.59513647762346}, "speed": {"eval_elapsed_time": 19.68077167298179}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 87.07646648866317}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 43189250, "linear_total": 84934656, "linear_nnz": 19230720, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1584896, "linear_attention_total": 2359296, "linear_attention_nnz": 494336, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1917184, "linear_attention_total": 2359296, "linear_attention_nnz": 631552, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2092032, "linear_attention_total": 2359296, "linear_attention_nnz": 648192, "linear_dense_total": 4718592, "linear_dense_nnz": 1443840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2466816, "linear_attention_total": 2359296, "linear_attention_nnz": 1047552, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2403328, "linear_attention_total": 2359296, "linear_attention_nnz": 942592, "linear_dense_total": 4718592, "linear_dense_nnz": 1460736}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2211072, "linear_attention_total": 2359296, "linear_attention_nnz": 837888, "linear_dense_total": 4718592, "linear_dense_nnz": 1373184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1936640, "linear_attention_total": 2359296, "linear_attention_nnz": 841472, "linear_dense_total": 4718592, "linear_dense_nnz": 1095168}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661440, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 827904}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1084160, "linear_attention_total": 2359296, "linear_attention_nnz": 621824, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621056, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 188928}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 568064, "linear_attention_total": 2359296, "linear_attention_nnz": 411392, "linear_dense_total": 4718592, "linear_dense_nnz": 156672}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 684032, "linear_attention_total": 2359296, "linear_attention_nnz": 223232, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 60.33796825450584, "linear_sparsity": 77.3582175925926}, "speed": {"eval_elapsed_time": 18.309701333986595}, "opt_eval_metrics": {"exact_match": 78.74172185430463, "f1": 86.7418554019491}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42070530, "linear_total": 84934656, "linear_nnz": 18112000, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451008, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 970752}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835264, "linear_attention_total": 2359296, "linear_attention_nnz": 620288, "linear_dense_total": 4718592, "linear_dense_nnz": 1214976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2000384, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1374720}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2271232, "linear_attention_total": 2359296, "linear_attention_nnz": 933376, "linear_dense_total": 4718592, "linear_dense_nnz": 1337856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2267904, "linear_attention_total": 2359296, "linear_attention_nnz": 862464, "linear_dense_total": 4718592, "linear_dense_nnz": 1405440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081536, "linear_attention_total": 2359296, "linear_attention_nnz": 783616, "linear_dense_total": 4718592, "linear_dense_nnz": 1297920}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1807104, "linear_attention_total": 2359296, "linear_attention_nnz": 773376, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602048, "linear_attention_total": 2359296, "linear_attention_nnz": 811008, "linear_dense_total": 4718592, "linear_dense_nnz": 791040}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1009920, "linear_attention_total": 2359296, "linear_attention_nnz": 572160, "linear_dense_total": 4718592, "linear_dense_nnz": 437760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 543232, "linear_attention_total": 2359296, "linear_attention_nnz": 392704, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649472, "linear_attention_total": 2359296, "linear_attention_nnz": 214784, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}}, "total_sparsity": 61.365323630075444, "linear_sparsity": 78.67537133487654}, "speed": {"eval_elapsed_time": 18.103150750976056}, "opt_eval_metrics": {"exact_match": 78.92147587511826, "f1": 86.74888507219117}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41670402, "linear_total": 84934656, "linear_nnz": 17711872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1409024, "linear_attention_total": 2359296, "linear_attention_nnz": 468992, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1792000, "linear_attention_total": 2359296, "linear_attention_nnz": 606208, "linear_dense_total": 4718592, "linear_dense_nnz": 1185792}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1974272, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1348608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2231552, "linear_attention_total": 2359296, "linear_attention_nnz": 910592, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2209536, "linear_attention_total": 2359296, "linear_attention_nnz": 828672, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2046464, "linear_attention_total": 2359296, "linear_attention_nnz": 765440, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1764096, "linear_attention_total": 2359296, "linear_attention_nnz": 761088, "linear_dense_total": 4718592, "linear_dense_nnz": 1003008}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1573120, "linear_attention_total": 2359296, "linear_attention_nnz": 792832, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 986880, "linear_attention_total": 2359296, "linear_attention_nnz": 553728, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 572672, "linear_attention_total": 2359296, "linear_attention_nnz": 389888, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 525568, "linear_attention_total": 2359296, "linear_attention_nnz": 378112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}}, "total_sparsity": 61.73277361909495, "linear_sparsity": 79.14647231867285}, "speed": {"eval_elapsed_time": 17.77731288096402}, "opt_eval_metrics": {"exact_match": 78.76064333017976, "f1": 86.70283536757672}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41478658, "linear_total": 84934656, "linear_nnz": 17520128, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404160, "linear_attention_total": 2359296, "linear_attention_nnz": 465664, "linear_dense_total": 4718592, "linear_dense_nnz": 938496}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1766912, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1182720}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1961216, "linear_attention_total": 2359296, "linear_attention_nnz": 615680, "linear_dense_total": 4718592, "linear_dense_nnz": 1345536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 895488, "linear_dense_total": 4718592, "linear_dense_nnz": 1314816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2189824, "linear_attention_total": 2359296, "linear_attention_nnz": 812032, "linear_dense_total": 4718592, "linear_dense_nnz": 1377792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2038016, "linear_attention_total": 2359296, "linear_attention_nnz": 755456, "linear_dense_total": 4718592, "linear_dense_nnz": 1282560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1738240, "linear_attention_total": 2359296, "linear_attention_nnz": 739840, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571584, "linear_attention_total": 2359296, "linear_attention_nnz": 797440, "linear_dense_total": 4718592, "linear_dense_nnz": 774144}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 943872, "linear_attention_total": 2359296, "linear_attention_nnz": 513792, "linear_dense_total": 4718592, "linear_dense_nnz": 430080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563968, "linear_attention_total": 2359296, "linear_attention_nnz": 381184, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516352, "linear_attention_total": 2359296, "linear_attention_nnz": 370432, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 615680, "linear_attention_total": 2359296, "linear_attention_nnz": 200960, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}}, "total_sparsity": 61.90885809879785, "linear_sparsity": 79.37222704475309}, "speed": {"eval_elapsed_time": 17.746100773918442}, "opt_eval_metrics": {"exact_match": 78.85525070955535, "f1": 86.78368120366805}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40218943, "linear_total": 84934656, "linear_nnz": 16260413, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725694, "linear_attention_total": 2359296, "linear_attention_nnz": 36794, "linear_dense_total": 4718592, "linear_dense_nnz": 1688900}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959620, "linear_attention_total": 2359296, "linear_attention_nnz": 233028, "linear_dense_total": 4718592, "linear_dense_nnz": 1726592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969125, "linear_attention_total": 2359296, "linear_attention_nnz": 194318, "linear_dense_total": 4718592, "linear_dense_nnz": 1774807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2012358, "linear_attention_total": 2359296, "linear_attention_nnz": 270153, "linear_dense_total": 4718592, "linear_dense_nnz": 1742205}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860862, "linear_attention_total": 2359296, "linear_attention_nnz": 207935, "linear_dense_total": 4718592, "linear_dense_nnz": 1652927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815188, "linear_attention_total": 2359296, "linear_attention_nnz": 215427, "linear_dense_total": 4718592, "linear_dense_nnz": 1599761}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518978, "linear_attention_total": 2359296, "linear_attention_nnz": 114563, "linear_dense_total": 4718592, "linear_dense_nnz": 1404415}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307646, "linear_attention_total": 2359296, "linear_attention_nnz": 165011, "linear_dense_total": 4718592, "linear_dense_nnz": 1142635}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 946142, "linear_attention_total": 2359296, "linear_attention_nnz": 86589, "linear_dense_total": 4718592, "linear_dense_nnz": 859553}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531809, "linear_attention_total": 2359296, "linear_attention_nnz": 110020, "linear_dense_total": 4718592, "linear_dense_nnz": 421789}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 419075, "linear_attention_total": 2359296, "linear_attention_nnz": 89475, "linear_dense_total": 4718592, "linear_dense_nnz": 329600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 193916, "linear_attention_total": 2359296, "linear_attention_nnz": 45791, "linear_dense_total": 4718592, "linear_dense_nnz": 148125}}, "total_sparsity": 63.065693568741764, "linear_sparsity": 80.85538487375518}, "speed": {"eval_elapsed_time": 59.936431092966814}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39702836, "linear_total": 84934656, "linear_nnz": 15791104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125888, "linear_attention_total": 2359296, "linear_attention_nnz": 838656, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1188352, "linear_attention_total": 2359296, "linear_attention_nnz": 692224, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1694720, "linear_attention_total": 2359296, "linear_attention_nnz": 1089536, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1962496, "linear_attention_total": 2359296, "linear_attention_nnz": 1291264, "linear_dense_total": 4718592, "linear_dense_nnz": 671232}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2112512, "linear_attention_total": 2359296, "linear_attention_nnz": 1384448, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1783296, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1632768, "linear_attention_total": 2359296, "linear_attention_nnz": 1127424, "linear_dense_total": 4718592, "linear_dense_nnz": 505344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1333760, "linear_attention_total": 2359296, "linear_attention_nnz": 942080, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1204736, "linear_attention_total": 2359296, "linear_attention_nnz": 982016, "linear_dense_total": 4718592, "linear_dense_nnz": 222720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 729600, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 573952, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 449024, "linear_attention_total": 2359296, "linear_attention_nnz": 293888, "linear_dense_total": 4718592, "linear_dense_nnz": 155136}}, "total_sparsity": 63.53965068117302, "linear_sparsity": 81.40793788580247}, "speed": {"eval_elapsed_time": 17.66016855603084}, "opt_eval_metrics": {"exact_match": 79.20529801324503, "f1": 87.11181141207972}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 30451970, "linear_total": 84934656, "linear_nnz": 6493440, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 490240, "linear_attention_total": 2359296, "linear_attention_nnz": 259840, "linear_dense_total": 4718592, "linear_dense_nnz": 230400}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591104, "linear_attention_total": 2359296, "linear_attention_nnz": 225536, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 770048, "linear_attention_total": 2359296, "linear_attention_nnz": 286208, "linear_dense_total": 4718592, "linear_dense_nnz": 483840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 863488, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 747008, "linear_attention_total": 2359296, "linear_attention_nnz": 214016, "linear_dense_total": 4718592, "linear_dense_nnz": 532992}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 763392, "linear_attention_total": 2359296, "linear_attention_nnz": 285696, "linear_dense_total": 4718592, "linear_dense_nnz": 477696}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 499456, "linear_attention_total": 2359296, "linear_attention_nnz": 113920, "linear_dense_total": 4718592, "linear_dense_nnz": 385536}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 650752, "linear_attention_total": 2359296, "linear_attention_nnz": 303616, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 407808, "linear_attention_total": 2359296, "linear_attention_nnz": 162048, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 293888, "linear_attention_total": 2359296, "linear_attention_nnz": 206336, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 206336, "linear_attention_total": 2359296, "linear_attention_nnz": 117248, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 209920, "linear_attention_total": 2359296, "linear_attention_nnz": 103936, "linear_dense_total": 4718592, "linear_dense_nnz": 105984}}, "total_sparsity": 72.03500869191208, "linear_sparsity": 92.35478153935185}, "speed": {"eval_elapsed_time": 11.2053101999918}, "opt_eval_metrics": {"exact_match": 73.92620624408704, "f1": 83.01994135540168}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 30144002, "linear_total": 84934656, "linear_nnz": 6185472, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 462848, "linear_attention_total": 2359296, "linear_attention_nnz": 237056, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566784, "linear_attention_total": 2359296, "linear_attention_nnz": 219648, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 744960, "linear_attention_total": 2359296, "linear_attention_nnz": 278016, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 823552, "linear_attention_total": 2359296, "linear_attention_nnz": 379648, "linear_dense_total": 4718592, "linear_dense_nnz": 443904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708352, "linear_attention_total": 2359296, "linear_attention_nnz": 193792, "linear_dense_total": 4718592, "linear_dense_nnz": 514560}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 703488, "linear_attention_total": 2359296, "linear_attention_nnz": 247296, "linear_dense_total": 4718592, "linear_dense_nnz": 456192}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488448, "linear_attention_total": 2359296, "linear_attention_nnz": 118272, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 636160, "linear_attention_total": 2359296, "linear_attention_nnz": 296704, "linear_dense_total": 4718592, "linear_dense_nnz": 339456}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 393728, "linear_attention_total": 2359296, "linear_attention_nnz": 152576, "linear_dense_total": 4718592, "linear_dense_nnz": 241152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 278528, "linear_attention_total": 2359296, "linear_attention_nnz": 190976, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 190976, "linear_attention_total": 2359296, "linear_attention_nnz": 104960, "linear_dense_total": 4718592, "linear_dense_nnz": 86016}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 187648, "linear_attention_total": 2359296, "linear_attention_nnz": 90880, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 72.31782528614784, "linear_sparsity": 92.7173755787037}, "speed": {"eval_elapsed_time": 11.106899423059076}, "opt_eval_metrics": {"exact_match": 73.48155156102176, "f1": 82.77426887329388}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 30105858, "linear_total": 84934656, "linear_nnz": 6147328, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460800, "linear_attention_total": 2359296, "linear_attention_nnz": 235008, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566272, "linear_attention_total": 2359296, "linear_attention_nnz": 220672, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727040, "linear_attention_total": 2359296, "linear_attention_nnz": 260096, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 821760, "linear_attention_total": 2359296, "linear_attention_nnz": 380928, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 718080, "linear_attention_total": 2359296, "linear_attention_nnz": 215808, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 706048, "linear_attention_total": 2359296, "linear_attention_nnz": 251392, "linear_dense_total": 4718592, "linear_dense_nnz": 454656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 475648, "linear_attention_total": 2359296, "linear_attention_nnz": 103936, "linear_dense_total": 4718592, "linear_dense_nnz": 371712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 624384, "linear_attention_total": 2359296, "linear_attention_nnz": 284928, "linear_dense_total": 4718592, "linear_dense_nnz": 339456}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 388608, "linear_attention_total": 2359296, "linear_attention_nnz": 147456, "linear_dense_total": 4718592, "linear_dense_nnz": 241152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 268800, "linear_attention_total": 2359296, "linear_attention_nnz": 181248, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 199168, "linear_attention_total": 2359296, "linear_attention_nnz": 113152, "linear_dense_total": 4718592, "linear_dense_nnz": 86016}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 190720, "linear_attention_total": 2359296, "linear_attention_nnz": 93952, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 72.35285410787779, "linear_sparsity": 92.76228539737654}, "speed": {"eval_elapsed_time": 11.171043560025282}, "opt_eval_metrics": {"exact_match": 73.04635761589404, "f1": 82.29210924509454}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41908226, "linear_total": 84934656, "linear_nnz": 17949696, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081280, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 606720}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529856, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 841728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2437632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2115072, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1927680, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1448448, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 668160}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665088, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 173568}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 595968, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.51437244200017, "linear_sparsity": 78.86646412037037}, "speed": {"eval_elapsed_time": 13.23735156096518}, "opt_eval_metrics": {"exact_match": 78.05108798486282, "f1": 85.81174728555466}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41954306, "linear_total": 84934656, "linear_nnz": 17995776, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2079744, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1626624, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 840192}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434560, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1058304}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2116608, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1035264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1829376, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886208, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1099776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497600, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1210368, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 764928, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 546816, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.4720557446083, "linear_sparsity": 78.81221064814815}, "speed": {"eval_elapsed_time": 12.734316703979857}, "opt_eval_metrics": {"exact_match": 77.62535477767265, "f1": 85.49958980627748}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 38322466, "linear_total": 84934656, "linear_nnz": 14411776, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1015808, "linear_attention_total": 2359296, "linear_attention_nnz": 498688, "linear_dense_total": 4718592, "linear_dense_nnz": 517120}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1176576, "linear_attention_total": 2359296, "linear_attention_nnz": 510976, "linear_dense_total": 4718592, "linear_dense_nnz": 665600}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1846272, "linear_attention_total": 2359296, "linear_attention_nnz": 708608, "linear_dense_total": 4718592, "linear_dense_nnz": 1137664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1953792, "linear_attention_total": 2359296, "linear_attention_nnz": 832512, "linear_dense_total": 4718592, "linear_dense_nnz": 1121280}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1854464, "linear_attention_total": 2359296, "linear_attention_nnz": 739328, "linear_dense_total": 4718592, "linear_dense_nnz": 1115136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1754112, "linear_attention_total": 2359296, "linear_attention_nnz": 576512, "linear_dense_total": 4718592, "linear_dense_nnz": 1177600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522688, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 920576}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1283072, "linear_attention_total": 2359296, "linear_attention_nnz": 728064, "linear_dense_total": 4718592, "linear_dense_nnz": 555008}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 641024, "linear_attention_total": 2359296, "linear_attention_nnz": 465920, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455680, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 98304}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496640, "linear_attention_total": 2359296, "linear_attention_nnz": 311296, "linear_dense_total": 4718592, "linear_dense_nnz": 185344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 199680, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}}, "total_sparsity": 64.80728739078312, "linear_sparsity": 83.03192515432099}, "speed": {"eval_elapsed_time": 19.318465403979644}, "opt_eval_metrics": {"exact_match": 75.54399243140965, "f1": 84.18974712714544}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 37814786, "linear_total": 84934656, "linear_nnz": 13904896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 974848, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 485376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1168384, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1829888, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 1119232}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1906688, "linear_attention_total": 2359296, "linear_attention_nnz": 851968, "linear_dense_total": 4718592, "linear_dense_nnz": 1054720}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1767424, "linear_attention_total": 2359296, "linear_attention_nnz": 708608, "linear_dense_total": 4718592, "linear_dense_nnz": 1058816}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1684480, "linear_attention_total": 2359296, "linear_attention_nnz": 556032, "linear_dense_total": 4718592, "linear_dense_nnz": 1128448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1434624, "linear_attention_total": 2359296, "linear_attention_nnz": 569344, "linear_dense_total": 4718592, "linear_dense_nnz": 865280}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1205248, "linear_attention_total": 2359296, "linear_attention_nnz": 692224, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 635904, "linear_attention_total": 2359296, "linear_attention_nnz": 470016, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 430080, "linear_attention_total": 2359296, "linear_attention_nnz": 347136, "linear_dense_total": 4718592, "linear_dense_nnz": 82944}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 312320, "linear_dense_total": 4718592, "linear_dense_nnz": 189440}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 365568, "linear_attention_total": 2359296, "linear_attention_nnz": 174080, "linear_dense_total": 4718592, "linear_dense_nnz": 191488}}, "total_sparsity": 65.2735057269791, "linear_sparsity": 83.62871334876543}, "speed": {"eval_elapsed_time": 19.066289516864344}, "opt_eval_metrics": {"exact_match": 75.44938505203406, "f1": 84.0707510238674}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36247394, "linear_total": 84934656, "linear_nnz": 12339200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 897024, "linear_attention_total": 2359296, "linear_attention_nnz": 492544, "linear_dense_total": 4718592, "linear_dense_nnz": 404480}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1039360, "linear_attention_total": 2359296, "linear_attention_nnz": 488448, "linear_dense_total": 4718592, "linear_dense_nnz": 550912}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1658880, "linear_attention_total": 2359296, "linear_attention_nnz": 657408, "linear_dense_total": 4718592, "linear_dense_nnz": 1001472}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1664000, "linear_attention_total": 2359296, "linear_attention_nnz": 775168, "linear_dense_total": 4718592, "linear_dense_nnz": 888832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1506304, "linear_attention_total": 2359296, "linear_attention_nnz": 564224, "linear_dense_total": 4718592, "linear_dense_nnz": 942080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1456128, "linear_attention_total": 2359296, "linear_attention_nnz": 494592, "linear_dense_total": 4718592, "linear_dense_nnz": 961536}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1302528, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 790528}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1057792, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 419840}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 414720, "linear_dense_total": 4718592, "linear_dense_nnz": 156672}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 393216, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 79872}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 448512, "linear_attention_total": 2359296, "linear_attention_nnz": 267264, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 344064, "linear_attention_total": 2359296, "linear_attention_nnz": 153600, "linear_dense_total": 4718592, "linear_dense_nnz": 190464}}, "total_sparsity": 66.71289055680674, "linear_sparsity": 85.47212577160494}, "speed": {"eval_elapsed_time": 18.469548488035798}, "opt_eval_metrics": {"exact_match": 75.57237464522233, "f1": 84.02544962299854}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 36346370, "linear_total": 84934656, "linear_nnz": 12387840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1721856, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 950784, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1857024, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437696, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1113600, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1015296, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 425472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 731136, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 999936, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 213504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 296448, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 99840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 614400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 122880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}}, "total_sparsity": 66.62199781720042, "linear_sparsity": 85.4148582175926}, "speed": {"eval_elapsed_time": 11.27580028004013}, "opt_eval_metrics": {"exact_match": 76.75496688741723, "f1": 84.83470649534952}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 35750402, "linear_total": 84934656, "linear_nnz": 11791872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893952, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1247232, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 520704}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 978432, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 388608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 365568, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}}, "total_sparsity": 67.16929377013544, "linear_sparsity": 86.11653645833334}, "speed": {"eval_elapsed_time": 10.814438845962286}, "opt_eval_metrics": {"exact_match": 76.3197729422895, "f1": 84.62201750681498}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 35662850, "linear_total": 84934656, "linear_nnz": 11704320, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 897024, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 258048}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804800, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1184256, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1064448, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 474624}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 364032, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}}, "total_sparsity": 67.24969549518002, "linear_sparsity": 86.21961805555556}, "speed": {"eval_elapsed_time": 10.804757428006269}, "opt_eval_metrics": {"exact_match": 76.5279091769158, "f1": 84.6776690586996}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35638274, "linear_total": 84934656, "linear_nnz": 11679744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1586688, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 210432}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887808, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1175040, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 486912}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1062912, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 290304, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 93696}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360960, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 164352}}, "total_sparsity": 67.27226440045568, "linear_sparsity": 86.24855324074075}, "speed": {"eval_elapsed_time": 10.842320216004737}, "opt_eval_metrics": {"exact_match": 76.31031220435194, "f1": 84.63605545666391}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 36176130, "linear_total": 84934656, "linear_nnz": 12217600, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 911360, "linear_attention_total": 2359296, "linear_attention_nnz": 352256, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1284864, "linear_attention_total": 2359296, "linear_attention_nnz": 478464, "linear_dense_total": 4718592, "linear_dense_nnz": 806400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1386240, "linear_attention_total": 2359296, "linear_attention_nnz": 461568, "linear_dense_total": 4718592, "linear_dense_nnz": 924672}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1616128, "linear_attention_total": 2359296, "linear_attention_nnz": 763648, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1410048, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1415424, "linear_attention_total": 2359296, "linear_attention_nnz": 509184, "linear_dense_total": 4718592, "linear_dense_nnz": 906240}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1157632, "linear_attention_total": 2359296, "linear_attention_nnz": 458752, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1138688, "linear_attention_total": 2359296, "linear_attention_nnz": 550400, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 661504, "linear_attention_total": 2359296, "linear_attention_nnz": 311296, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 448512, "linear_attention_total": 2359296, "linear_attention_nnz": 319488, "linear_dense_total": 4718592, "linear_dense_nnz": 129024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379904, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 407296, "linear_attention_total": 2359296, "linear_attention_nnz": 160000, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}}, "total_sparsity": 66.7783345047871, "linear_sparsity": 85.6152946566358}, "speed": {"eval_elapsed_time": 14.883674454060383}, "opt_eval_metrics": {"exact_match": 76.43330179754021, "f1": 84.92125512821515}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33844482, "linear_total": 84934656, "linear_nnz": 9885952, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701184, "linear_attention_total": 2359296, "linear_attention_nnz": 295680, "linear_dense_total": 4718592, "linear_dense_nnz": 405504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042688, "linear_attention_total": 2359296, "linear_attention_nnz": 380672, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1087744, "linear_attention_total": 2359296, "linear_attention_nnz": 328960, "linear_dense_total": 4718592, "linear_dense_nnz": 758784}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1340672, "linear_attention_total": 2359296, "linear_attention_nnz": 612608, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1142784, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 811008}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1165312, "linear_attention_total": 2359296, "linear_attention_nnz": 411136, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908032, "linear_attention_total": 2359296, "linear_attention_nnz": 319744, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 956672, "linear_attention_total": 2359296, "linear_attention_nnz": 457472, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 557056, "linear_attention_total": 2359296, "linear_attention_nnz": 246784, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360192, "linear_attention_total": 2359296, "linear_attention_nnz": 252672, "linear_dense_total": 4718592, "linear_dense_nnz": 107520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315904, "linear_attention_total": 2359296, "linear_attention_nnz": 202240, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 307712, "linear_attention_total": 2359296, "linear_attention_nnz": 129536, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}}, "total_sparsity": 68.91955939281638, "linear_sparsity": 88.36052035108025}, "speed": {"eval_elapsed_time": 14.104866776964627}, "opt_eval_metrics": {"exact_match": 76.3670766319773, "f1": 84.90500621616839}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 50669453, "linear_total": 84934656, "linear_nnz": 26755584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3454464, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1979904}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2895360, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 2108928}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3497472, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2317824}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2995200, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 2110464}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2832384, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 2045952}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2528256, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 2036736}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2221056, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1631232}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1691136, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1297920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1684992, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 374784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1082880, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 591360}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1300992, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1006080}}, "total_sparsity": 53.46866515596302, "linear_sparsity": 68.4986255787037}, "speed": {"eval_elapsed_time": 15.358230478945188}, "opt_eval_metrics": {"exact_match": 78.12677388836329, "f1": 86.09062317714458}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 46753113, "linear_total": 84934656, "linear_nnz": 22841856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2904576, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1430016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2368512, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1582080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3084288, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1904640}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2600448, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1715712}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2244096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1654272}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096640, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1703424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1910784, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1476096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1082880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1534464, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 748032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523776, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 327168}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 990720, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1107456, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 812544}}, "total_sparsity": 57.06516200196401, "linear_sparsity": 73.10655381944444}, "speed": {"eval_elapsed_time": 13.718609332921915}, "opt_eval_metrics": {"exact_match": 78.31598864711448, "f1": 86.14732314693939}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 46580969, "linear_total": 84934656, "linear_nnz": 22669824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2883072, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1408512}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2342400, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1555968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3055104, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1875456}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2585088, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1700352}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1635840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2082816, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1689600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1901568, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1311744}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1468416, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1075200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1528320, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 741888}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520704, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 324096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1093632, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}}, "total_sparsity": 57.22324719197764, "linear_sparsity": 73.30910011574075}, "speed": {"eval_elapsed_time": 13.616545491036959}, "opt_eval_metrics": {"exact_match": 77.96594134342479, "f1": 85.85795020085484}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46293486, "linear_total": 84934656, "linear_nnz": 22382592, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850816, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1376256}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2323968, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1537536}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3022848, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1843200}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2557440, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1672704}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1620480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2068992, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1675776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790976, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1299456}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1462272, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1523712, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 737280}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 322560}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 970752, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 786432}}, "total_sparsity": 57.487251773494805, "linear_sparsity": 73.6472800925926}, "speed": {"eval_elapsed_time": 13.292588334996253}, "opt_eval_metrics": {"exact_match": 77.88079470198676, "f1": 85.81326419854291}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl300_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 26853628, "linear_total": 84934656, "linear_nnz": 2895098, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 357857, "linear_attention_total": 2359296, "linear_attention_nnz": 13195, "linear_dense_total": 4718592, "linear_dense_nnz": 344662}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 405482, "linear_attention_total": 2359296, "linear_attention_nnz": 53357, "linear_dense_total": 4718592, "linear_dense_nnz": 352125}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 395119, "linear_attention_total": 2359296, "linear_attention_nnz": 43981, "linear_dense_total": 4718592, "linear_dense_nnz": 351138}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 394117, "linear_attention_total": 2359296, "linear_attention_nnz": 71058, "linear_dense_total": 4718592, "linear_dense_nnz": 323059}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 335373, "linear_attention_total": 2359296, "linear_attention_nnz": 47705, "linear_dense_total": 4718592, "linear_dense_nnz": 287668}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 292526, "linear_attention_total": 2359296, "linear_attention_nnz": 40348, "linear_dense_total": 4718592, "linear_dense_nnz": 252178}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 238114, "linear_attention_total": 2359296, "linear_attention_nnz": 33002, "linear_dense_total": 4718592, "linear_dense_nnz": 205112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 188891, "linear_attention_total": 2359296, "linear_attention_nnz": 38753, "linear_dense_total": 4718592, "linear_dense_nnz": 150138}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 123365, "linear_attention_total": 2359296, "linear_attention_nnz": 22052, "linear_dense_total": 4718592, "linear_dense_nnz": 101313}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64415, "linear_attention_total": 2359296, "linear_attention_nnz": 28498, "linear_dense_total": 4718592, "linear_dense_nnz": 35917}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 53470, "linear_attention_total": 2359296, "linear_attention_nnz": 18747, "linear_dense_total": 4718592, "linear_dense_nnz": 34723}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 46369, "linear_attention_total": 2359296, "linear_attention_nnz": 15957, "linear_dense_total": 4718592, "linear_dense_nnz": 30412}}, "total_sparsity": 75.33947808267818, "linear_sparsity": 96.59138196780358}, "speed": {"eval_elapsed_time": 68.15529748401605, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 40641026, "linear_total": 84934656, "linear_nnz": 16682496, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1930752, "linear_attention_total": 2359296, "linear_attention_nnz": 1390080, "linear_dense_total": 4718592, "linear_dense_nnz": 540672}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1347840, "linear_attention_total": 2359296, "linear_attention_nnz": 622848, "linear_dense_total": 4718592, "linear_dense_nnz": 724992}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2423808, "linear_attention_total": 2359296, "linear_attention_nnz": 1506816, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1864704, "linear_attention_total": 2359296, "linear_attention_nnz": 966144, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1956096, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 990720}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742592, "linear_attention_total": 2359296, "linear_attention_nnz": 734976, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1323264, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835584, "linear_attention_total": 2359296, "linear_attention_nnz": 282624, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307904, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 403968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 536064, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 506880, "linear_attention_total": 2359296, "linear_attention_nnz": 322560, "linear_dense_total": 4718592, "linear_dense_nnz": 184320}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 761088, "linear_attention_total": 2359296, "linear_attention_nnz": 412416, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}}, "total_sparsity": 62.67808162027695, "linear_sparsity": 80.3584346064815}, "speed": {"eval_elapsed_time": 13.569498455966823}, "opt_eval_metrics": {"exact_match": 76.12109744560075, "f1": 84.59321000252827}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 39460610, "linear_total": 84934656, "linear_nnz": 15502080, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1801728, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1127424, "linear_attention_total": 2359296, "linear_attention_nnz": 471552, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2341632, "linear_attention_total": 2359296, "linear_attention_nnz": 1507584, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804032, "linear_attention_total": 2359296, "linear_attention_nnz": 960768, "linear_dense_total": 4718592, "linear_dense_nnz": 843264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899264, "linear_attention_total": 2359296, "linear_attention_nnz": 968448, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529088, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 564480, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738048, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 506880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 893952, "linear_dense_total": 4718592, "linear_dense_nnz": 384000}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668928, "linear_attention_total": 2359296, "linear_attention_nnz": 535296, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488448, "linear_attention_total": 2359296, "linear_attention_nnz": 319488, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 560640, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}}, "total_sparsity": 63.76209435179903, "linear_sparsity": 81.7482277199074}, "speed": {"eval_elapsed_time": 12.87935333198402}, "opt_eval_metrics": {"exact_match": 76.2251655629139, "f1": 84.80214537282716}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 39496706, "linear_total": 84934656, "linear_nnz": 15538176, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798656, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1122816, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 649728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2325504, "linear_attention_total": 2359296, "linear_attention_nnz": 1500672, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790208, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886976, "linear_attention_total": 2359296, "linear_attention_nnz": 963840, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522944, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 924672}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 558336, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 737280, "linear_attention_total": 2359296, "linear_attention_nnz": 235008, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1286400, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 382464}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665856, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 417792, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 559104, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 301056}}, "total_sparsity": 63.72894627217538, "linear_sparsity": 81.70572916666666}, "speed": {"eval_elapsed_time": 12.830008602933958}, "opt_eval_metrics": {"exact_match": 75.68590350047303, "f1": 84.47747389903205}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39419906, "linear_total": 84934656, "linear_nnz": 15461376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1800192, "linear_attention_total": 2359296, "linear_attention_nnz": 1334784, "linear_dense_total": 4718592, "linear_dense_nnz": 465408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1118976, "linear_attention_total": 2359296, "linear_attention_nnz": 473856, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2320896, "linear_attention_total": 2359296, "linear_attention_nnz": 1497600, "linear_dense_total": 4718592, "linear_dense_nnz": 823296}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1788672, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 832512}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1891584, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1520640, "linear_attention_total": 2359296, "linear_attention_nnz": 600576, "linear_dense_total": 4718592, "linear_dense_nnz": 920064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 732672, "linear_attention_total": 2359296, "linear_attention_nnz": 230400, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277184, "linear_attention_total": 2359296, "linear_attention_nnz": 897792, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 660480, "linear_attention_total": 2359296, "linear_attention_nnz": 528384, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555264, "linear_attention_total": 2359296, "linear_attention_nnz": 257280, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.79947410116185, "linear_sparsity": 81.79615162037037}, "speed": {"eval_elapsed_time": 12.832387157017365}, "opt_eval_metrics": {"exact_match": 76.0170293282876, "f1": 84.47498508158148}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39382274, "linear_total": 84934656, "linear_nnz": 15423744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1793280, "linear_attention_total": 2359296, "linear_attention_nnz": 1323264, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1115136, "linear_attention_total": 2359296, "linear_attention_nnz": 470016, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2321664, "linear_attention_total": 2359296, "linear_attention_nnz": 1496832, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1789440, "linear_attention_total": 2359296, "linear_attention_nnz": 960000, "linear_dense_total": 4718592, "linear_dense_nnz": 829440}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1843968, "linear_attention_total": 2359296, "linear_attention_nnz": 917760, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526016, "linear_attention_total": 2359296, "linear_attention_nnz": 607488, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 567552, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730368, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1279488, "linear_attention_total": 2359296, "linear_attention_nnz": 900096, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667392, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 556032, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.83403273736522, "linear_sparsity": 81.84045862268519}, "speed": {"eval_elapsed_time": 12.66309662302956}, "opt_eval_metrics": {"exact_match": 75.93188268684958, "f1": 84.50793088999642}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 45409666, "linear_total": 84934656, "linear_nnz": 21492736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1715200, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 1234944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895424, "linear_attention_total": 2359296, "linear_attention_nnz": 400384, "linear_dense_total": 4718592, "linear_dense_nnz": 1495040}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3012608, "linear_attention_total": 2359296, "linear_attention_nnz": 594944, "linear_dense_total": 4718592, "linear_dense_nnz": 2417664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3094528, "linear_attention_total": 2359296, "linear_attention_nnz": 813056, "linear_dense_total": 4718592, "linear_dense_nnz": 2281472}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2762752, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 2163712}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2707456, "linear_attention_total": 2359296, "linear_attention_nnz": 562176, "linear_dense_total": 4718592, "linear_dense_nnz": 2145280}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2233344, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1741824, "linear_attention_total": 2359296, "linear_attention_nnz": 678912, "linear_dense_total": 4718592, "linear_dense_nnz": 1062912}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 709632, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 370688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 524288, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 164864}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 593920, "linear_attention_total": 2359296, "linear_attention_nnz": 267264, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 163840, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}}, "total_sparsity": 58.29889117212532, "linear_sparsity": 74.6949749228395}, "speed": {"eval_elapsed_time": 21.401531255804002}, "opt_eval_metrics": {"exact_match": 77.01986754966887, "f1": 85.2617013700351}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 44308674, "linear_total": 84934656, "linear_nnz": 20392960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1598464, "linear_attention_total": 2359296, "linear_attention_nnz": 458752, "linear_dense_total": 4718592, "linear_dense_nnz": 1139712}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825792, "linear_attention_total": 2359296, "linear_attention_nnz": 398336, "linear_dense_total": 4718592, "linear_dense_nnz": 1427456}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2854912, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 2257920}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2905088, "linear_attention_total": 2359296, "linear_attention_nnz": 781312, "linear_dense_total": 4718592, "linear_dense_nnz": 2123776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2643968, "linear_attention_total": 2359296, "linear_attention_nnz": 620544, "linear_dense_total": 4718592, "linear_dense_nnz": 2023424}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2543616, "linear_attention_total": 2359296, "linear_attention_nnz": 573440, "linear_dense_total": 4718592, "linear_dense_nnz": 1970176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2049024, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 1588224}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1708032, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 666624, "linear_attention_total": 2359296, "linear_attention_nnz": 307200, "linear_dense_total": 4718592, "linear_dense_nnz": 359424}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489472, "linear_attention_total": 2359296, "linear_attention_nnz": 327680, "linear_dense_total": 4718592, "linear_dense_nnz": 161792}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598016, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 509952, "linear_attention_total": 2359296, "linear_attention_nnz": 162816, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}}, "total_sparsity": 59.309966373837206, "linear_sparsity": 75.98982445987654}, "speed": {"eval_elapsed_time": 21.01131779095158}, "opt_eval_metrics": {"exact_match": 76.98202459791864, "f1": 85.22056943761015}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 43172098, "linear_total": 84934656, "linear_nnz": 19257344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1469440, "linear_attention_total": 2359296, "linear_attention_nnz": 443392, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 396288, "linear_dense_total": 4718592, "linear_dense_nnz": 1296384}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692096, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 2113536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2728960, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 1973248}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2531328, "linear_attention_total": 2359296, "linear_attention_nnz": 565248, "linear_dense_total": 4718592, "linear_dense_nnz": 1966080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434048, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 1887232}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1978368, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 1502208}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638400, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1000448}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 620544, "linear_attention_total": 2359296, "linear_attention_nnz": 310272, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 457728, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 308224}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 467968, "linear_attention_total": 2359296, "linear_attention_nnz": 152576, "linear_dense_total": 4718592, "linear_dense_nnz": 315392}}, "total_sparsity": 60.35371946964616, "linear_sparsity": 77.3268711419753}, "speed": {"eval_elapsed_time": 20.67874938994646}, "opt_eval_metrics": {"exact_match": 76.92526017029329, "f1": 85.21713644985097}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42975330, "linear_total": 84934656, "linear_nnz": 19060736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1463296, "linear_attention_total": 2359296, "linear_attention_nnz": 455680, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1699840, "linear_attention_total": 2359296, "linear_attention_nnz": 399360, "linear_dense_total": 4718592, "linear_dense_nnz": 1300480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2724864, "linear_attention_total": 2359296, "linear_attention_nnz": 544768, "linear_dense_total": 4718592, "linear_dense_nnz": 2180096}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2670592, "linear_attention_total": 2359296, "linear_attention_nnz": 731136, "linear_dense_total": 4718592, "linear_dense_nnz": 1939456}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2498560, "linear_attention_total": 2359296, "linear_attention_nnz": 557056, "linear_dense_total": 4718592, "linear_dense_nnz": 1941504}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2407424, "linear_attention_total": 2359296, "linear_attention_nnz": 527360, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1928192, "linear_attention_total": 2359296, "linear_attention_nnz": 472064, "linear_dense_total": 4718592, "linear_dense_nnz": 1456128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 607232, "linear_dense_total": 4718592, "linear_dense_nnz": 977920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 317440}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455680, "linear_attention_total": 2359296, "linear_attention_nnz": 308224, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 240640, "linear_dense_total": 4718592, "linear_dense_nnz": 305152}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 329728}}, "total_sparsity": 60.53441764482857, "linear_sparsity": 77.55835262345678}, "speed": {"eval_elapsed_time": 20.635029988130555}, "opt_eval_metrics": {"exact_match": 77.08609271523179, "f1": 85.20287591064626}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53955042, "linear_total": 84934656, "linear_nnz": 30029824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2392064, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 1844224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2719744, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 2172928}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3872768, "linear_attention_total": 2359296, "linear_attention_nnz": 675840, "linear_dense_total": 4718592, "linear_dense_nnz": 3196928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4077568, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 3111936}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4003840, "linear_attention_total": 2359296, "linear_attention_nnz": 896000, "linear_dense_total": 4718592, "linear_dense_nnz": 3107840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3832832, "linear_attention_total": 2359296, "linear_attention_nnz": 696320, "linear_dense_total": 4718592, "linear_dense_nnz": 3136512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3280896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 2525184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2510848, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1711104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257472, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 747520}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 420864, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 748544, "linear_attention_total": 2359296, "linear_attention_nnz": 356352, "linear_dense_total": 4718592, "linear_dense_nnz": 392192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 651264, "linear_attention_total": 2359296, "linear_attention_nnz": 217088, "linear_dense_total": 4718592, "linear_dense_nnz": 434176}}, "total_sparsity": 50.45140657377771, "linear_sparsity": 64.6436149691358}, "speed": {"eval_elapsed_time": 25.327169548952952}, "opt_eval_metrics": {"exact_match": 78.94985808893094, "f1": 86.768721062838}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 55275810, "linear_total": 84934656, "linear_nnz": 31358976, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4016640, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 2542080}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3340800, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 2652672}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4039680, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2860032}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4041216, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2763264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3187200, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 2499072}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2870784, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 2477568}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2580480, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1990656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815552, "linear_attention_total": 2359296, "linear_attention_nnz": 245760, "linear_dense_total": 4718592, "linear_dense_nnz": 1569792}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1893888, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1107456}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 714240, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 517632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1281024, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 789504}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1577472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1282560}}, "total_sparsity": 49.23850423478289, "linear_sparsity": 63.078703703703695}, "speed": {"eval_elapsed_time": 16.857338295085356}, "opt_eval_metrics": {"exact_match": 78.21192052980132, "f1": 86.01032921346379}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47626001, "linear_total": 84934656, "linear_nnz": 23714304, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2895360, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2330112, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3240960, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2061312}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3095040, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1915392}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2291712, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1800192}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2221056, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1827840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1861632, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1300992, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 1104384}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637376, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 582144, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 385536}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1052160, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 560640}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1205760, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 910848}}, "total_sparsity": 56.2635617989908, "linear_sparsity": 72.07935474537037}, "speed": {"eval_elapsed_time": 13.838669790071435}, "opt_eval_metrics": {"exact_match": 77.78618732261117, "f1": 85.70556837897196}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 50726818, "linear_total": 84934656, "linear_nnz": 26803200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2117632, "linear_attention_total": 2359296, "linear_attention_nnz": 757760, "linear_dense_total": 4718592, "linear_dense_nnz": 1359872}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2221056, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1583104}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3328000, "linear_attention_total": 2359296, "linear_attention_nnz": 900096, "linear_dense_total": 4718592, "linear_dense_nnz": 2427904}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3734528, "linear_attention_total": 2359296, "linear_attention_nnz": 1215488, "linear_dense_total": 4718592, "linear_dense_nnz": 2519040}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3650560, "linear_attention_total": 2359296, "linear_attention_nnz": 1223680, "linear_dense_total": 4718592, "linear_dense_nnz": 2426880}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3445760, "linear_attention_total": 2359296, "linear_attention_nnz": 1035264, "linear_dense_total": 4718592, "linear_dense_nnz": 2410496}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2830336, "linear_attention_total": 2359296, "linear_attention_nnz": 1011712, "linear_dense_total": 4718592, "linear_dense_nnz": 1818624}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2111488, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 1195008}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1225728, "linear_attention_total": 2359296, "linear_attention_nnz": 860160, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 765952, "linear_attention_total": 2359296, "linear_attention_nnz": 603136, "linear_dense_total": 4718592, "linear_dense_nnz": 162816}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 745472, "linear_attention_total": 2359296, "linear_attention_nnz": 418816, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 302080, "linear_dense_total": 4718592, "linear_dense_nnz": 324608}}, "total_sparsity": 53.41598509203321, "linear_sparsity": 68.4425636574074}, "speed": {"eval_elapsed_time": 25.054876235080883}, "opt_eval_metrics": {"exact_match": 77.64427625354777, "f1": 85.9245488273656}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 45695714, "linear_total": 84934656, "linear_nnz": 21777408, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1549312, "linear_attention_total": 2359296, "linear_attention_nnz": 679936, "linear_dense_total": 4718592, "linear_dense_nnz": 869376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1868800, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 1269760}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2739200, "linear_attention_total": 2359296, "linear_attention_nnz": 875520, "linear_dense_total": 4718592, "linear_dense_nnz": 1863680}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3088384, "linear_attention_total": 2359296, "linear_attention_nnz": 1137664, "linear_dense_total": 4718592, "linear_dense_nnz": 1950720}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2821120, "linear_attention_total": 2359296, "linear_attention_nnz": 1033216, "linear_dense_total": 4718592, "linear_dense_nnz": 1787904}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2709504, "linear_attention_total": 2359296, "linear_attention_nnz": 850944, "linear_dense_total": 4718592, "linear_dense_nnz": 1858560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225152, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1426432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 987136}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049600, "linear_attention_total": 2359296, "linear_attention_nnz": 782336, "linear_dense_total": 4718592, "linear_dense_nnz": 267264}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649216, "linear_attention_total": 2359296, "linear_attention_nnz": 504832, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 662528, "linear_attention_total": 2359296, "linear_attention_nnz": 379904, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 548864, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 290816}}, "total_sparsity": 58.036204395746125, "linear_sparsity": 74.35980902777779}, "speed": {"eval_elapsed_time": 22.711076447973028}, "opt_eval_metrics": {"exact_match": 77.92809839167455, "f1": 85.97854187426412}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 44413282, "linear_total": 84934656, "linear_nnz": 20496384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1436672, "linear_attention_total": 2359296, "linear_attention_nnz": 647168, "linear_dense_total": 4718592, "linear_dense_nnz": 789504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798144, "linear_attention_total": 2359296, "linear_attention_nnz": 591872, "linear_dense_total": 4718592, "linear_dense_nnz": 1206272}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2583552, "linear_attention_total": 2359296, "linear_attention_nnz": 843776, "linear_dense_total": 4718592, "linear_dense_nnz": 1739776}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2975744, "linear_attention_total": 2359296, "linear_attention_nnz": 1118208, "linear_dense_total": 4718592, "linear_dense_nnz": 1857536}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1760256}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2509824, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 1718272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2085888, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 1330176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1731584, "linear_attention_total": 2359296, "linear_attention_nnz": 827392, "linear_dense_total": 4718592, "linear_dense_nnz": 904192}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 726016, "linear_dense_total": 4718592, "linear_dense_nnz": 257024}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 464896, "linear_dense_total": 4718592, "linear_dense_nnz": 118784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 622592, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 263168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 512000, "linear_attention_total": 2359296, "linear_attention_nnz": 240640, "linear_dense_total": 4718592, "linear_dense_nnz": 271360}}, "total_sparsity": 59.21390159343854, "linear_sparsity": 75.86805555555556}, "speed": {"eval_elapsed_time": 22.222527293022722}, "opt_eval_metrics": {"exact_match": 77.8713339640492, "f1": 85.84893170709621}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41117954, "linear_total": 84934656, "linear_nnz": 17159424, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879296, "linear_attention_total": 2359296, "linear_attention_nnz": 1459968, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1487616, "linear_attention_total": 2359296, "linear_attention_nnz": 930048, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2451456, "linear_attention_total": 2359296, "linear_attention_nnz": 1651200, "linear_dense_total": 4718592, "linear_dense_nnz": 800256}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959168, "linear_attention_total": 2359296, "linear_attention_nnz": 1181952, "linear_dense_total": 4718592, "linear_dense_nnz": 777216}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1876992, "linear_attention_total": 2359296, "linear_attention_nnz": 996864, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 720384, "linear_dense_total": 4718592, "linear_dense_nnz": 886272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1242624, "linear_attention_total": 2359296, "linear_attention_nnz": 595968, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1026048, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1362432, "linear_attention_total": 2359296, "linear_attention_nnz": 1029120, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784128, "linear_attention_total": 2359296, "linear_attention_nnz": 673536, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563712, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.240103802270966, "linear_sparsity": 79.7969111689815}, "speed": {"eval_elapsed_time": 14.605739500955679}, "opt_eval_metrics": {"exact_match": 78.01324503311258, "f1": 85.85711399770457}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41037314, "linear_total": 84934656, "linear_nnz": 17078784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1881600, "linear_attention_total": 2359296, "linear_attention_nnz": 1460736, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488384, "linear_attention_total": 2359296, "linear_attention_nnz": 930816, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2430720, "linear_attention_total": 2359296, "linear_attention_nnz": 1636608, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1953024, "linear_attention_total": 2359296, "linear_attention_nnz": 1172736, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1822464, "linear_attention_total": 2359296, "linear_attention_nnz": 946944, "linear_dense_total": 4718592, "linear_dense_nnz": 875520}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602816, "linear_attention_total": 2359296, "linear_attention_nnz": 719616, "linear_dense_total": 4718592, "linear_dense_nnz": 883200}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1248768, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023744, "linear_attention_total": 2359296, "linear_attention_nnz": 530688, "linear_dense_total": 4718592, "linear_dense_nnz": 493056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360128, "linear_attention_total": 2359296, "linear_attention_nnz": 1026816, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 785664, "linear_attention_total": 2359296, "linear_attention_nnz": 675072, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 562176, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.31415802270676, "linear_sparsity": 79.89185474537037}, "speed": {"eval_elapsed_time": 14.334042510017753}, "opt_eval_metrics": {"exact_match": 78.02270577105014, "f1": 85.8869692285446}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 70240546, "linear_total": 84934656, "linear_nnz": 46302208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4364288, "linear_attention_total": 2359296, "linear_attention_nnz": 770048, "linear_dense_total": 4718592, "linear_dense_nnz": 3594240}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4529152, "linear_attention_total": 2359296, "linear_attention_nnz": 724992, "linear_dense_total": 4718592, "linear_dense_nnz": 3804160}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5151744, "linear_attention_total": 2359296, "linear_attention_nnz": 1142784, "linear_dense_total": 4718592, "linear_dense_nnz": 4008960}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5341184, "linear_attention_total": 2359296, "linear_attention_nnz": 1333248, "linear_dense_total": 4718592, "linear_dense_nnz": 4007936}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5542912, "linear_attention_total": 2359296, "linear_attention_nnz": 1481728, "linear_dense_total": 4718592, "linear_dense_nnz": 4061184}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5168128, "linear_attention_total": 2359296, "linear_attention_nnz": 1220608, "linear_dense_total": 4718592, "linear_dense_nnz": 3947520}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5190656, "linear_attention_total": 2359296, "linear_attention_nnz": 1311744, "linear_dense_total": 4718592, "linear_dense_nnz": 3878912}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4362240, "linear_attention_total": 2359296, "linear_attention_nnz": 1070080, "linear_dense_total": 4718592, "linear_dense_nnz": 3292160}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2987008, "linear_attention_total": 2359296, "linear_attention_nnz": 1006592, "linear_dense_total": 4718592, "linear_dense_nnz": 1980416}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437696, "linear_attention_total": 2359296, "linear_attention_nnz": 684032, "linear_dense_total": 4718592, "linear_dense_nnz": 753664}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193984, "linear_attention_total": 2359296, "linear_attention_nnz": 571392, "linear_dense_total": 4718592, "linear_dense_nnz": 622592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1033216, "linear_attention_total": 2359296, "linear_attention_nnz": 368640, "linear_dense_total": 4718592, "linear_dense_nnz": 664576}}, "total_sparsity": 35.49592166400568, "linear_sparsity": 45.48490547839506}, "speed": {"eval_elapsed_time": 30.100851407973096}, "opt_eval_metrics": {"exact_match": 80.58656575212866, "f1": 87.97635235966065}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 65744386, "linear_total": 84934656, "linear_nnz": 41809920, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3775488, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 3140608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4079616, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 3477504}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4937728, "linear_attention_total": 2359296, "linear_attention_nnz": 1008640, "linear_dense_total": 4718592, "linear_dense_nnz": 3929088}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5286912, "linear_attention_total": 2359296, "linear_attention_nnz": 1197056, "linear_dense_total": 4718592, "linear_dense_nnz": 4089856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5135360, "linear_attention_total": 2359296, "linear_attention_nnz": 1181696, "linear_dense_total": 4718592, "linear_dense_nnz": 3953664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5012480, "linear_attention_total": 2359296, "linear_attention_nnz": 1005568, "linear_dense_total": 4718592, "linear_dense_nnz": 4006912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4720640, "linear_attention_total": 2359296, "linear_attention_nnz": 1043456, "linear_dense_total": 4718592, "linear_dense_nnz": 3677184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3708928, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 2777088}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2311168, "linear_attention_total": 2359296, "linear_attention_nnz": 862208, "linear_dense_total": 4718592, "linear_dense_nnz": 1448960}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1058816, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 458752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 951296, "linear_attention_total": 2359296, "linear_attention_nnz": 456704, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 831488, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 541696}}, "total_sparsity": 39.6248852522324, "linear_sparsity": 50.774016203703695}, "speed": {"eval_elapsed_time": 29.698436830891296}, "opt_eval_metrics": {"exact_match": 80.48249763481552, "f1": 87.91705961229685}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 38110440, "linear_total": 84934656, "linear_nnz": 14151910, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1521793, "linear_attention_total": 2359296, "linear_attention_nnz": 87221, "linear_dense_total": 4718592, "linear_dense_nnz": 1434572}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637844, "linear_attention_total": 2359296, "linear_attention_nnz": 157517, "linear_dense_total": 4718592, "linear_dense_nnz": 1480327}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1723746, "linear_attention_total": 2359296, "linear_attention_nnz": 188172, "linear_dense_total": 4718592, "linear_dense_nnz": 1535574}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742961, "linear_attention_total": 2359296, "linear_attention_nnz": 230341, "linear_dense_total": 4718592, "linear_dense_nnz": 1512620}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1687428, "linear_attention_total": 2359296, "linear_attention_nnz": 240387, "linear_dense_total": 4718592, "linear_dense_nnz": 1447041}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1623377, "linear_attention_total": 2359296, "linear_attention_nnz": 195780, "linear_dense_total": 4718592, "linear_dense_nnz": 1427597}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1429982, "linear_attention_total": 2359296, "linear_attention_nnz": 184963, "linear_dense_total": 4718592, "linear_dense_nnz": 1245019}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130199, "linear_attention_total": 2359296, "linear_attention_nnz": 172954, "linear_dense_total": 4718592, "linear_dense_nnz": 957245}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 773896, "linear_attention_total": 2359296, "linear_attention_nnz": 138133, "linear_dense_total": 4718592, "linear_dense_nnz": 635763}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417863, "linear_attention_total": 2359296, "linear_attention_nnz": 112972, "linear_dense_total": 4718592, "linear_dense_nnz": 304891}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279992, "linear_attention_total": 2359296, "linear_attention_nnz": 75446, "linear_dense_total": 4718592, "linear_dense_nnz": 204546}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 182829, "linear_attention_total": 2359296, "linear_attention_nnz": 38439, "linear_dense_total": 4718592, "linear_dense_nnz": 144390}}, "total_sparsity": 65.00199746198996, "linear_sparsity": 83.3378850677867}, "speed": {"eval_elapsed_time": 78.46566343901213, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37366845, "linear_total": 84934656, "linear_nnz": 13408315, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1442154, "linear_attention_total": 2359296, "linear_attention_nnz": 79341, "linear_dense_total": 4718592, "linear_dense_nnz": 1362813}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1557975, "linear_attention_total": 2359296, "linear_attention_nnz": 146964, "linear_dense_total": 4718592, "linear_dense_nnz": 1411011}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637409, "linear_attention_total": 2359296, "linear_attention_nnz": 173655, "linear_dense_total": 4718592, "linear_dense_nnz": 1463754}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1655712, "linear_attention_total": 2359296, "linear_attention_nnz": 213353, "linear_dense_total": 4718592, "linear_dense_nnz": 1442359}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601748, "linear_attention_total": 2359296, "linear_attention_nnz": 221518, "linear_dense_total": 4718592, "linear_dense_nnz": 1380230}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1539647, "linear_attention_total": 2359296, "linear_attention_nnz": 179373, "linear_dense_total": 4718592, "linear_dense_nnz": 1360274}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1352289, "linear_attention_total": 2359296, "linear_attention_nnz": 168393, "linear_dense_total": 4718592, "linear_dense_nnz": 1183896}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1066215, "linear_attention_total": 2359296, "linear_attention_nnz": 159612, "linear_dense_total": 4718592, "linear_dense_nnz": 906603}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727923, "linear_attention_total": 2359296, "linear_attention_nnz": 127230, "linear_dense_total": 4718592, "linear_dense_nnz": 600693}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 390947, "linear_attention_total": 2359296, "linear_attention_nnz": 105257, "linear_dense_total": 4718592, "linear_dense_nnz": 285690}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 262617, "linear_attention_total": 2359296, "linear_attention_nnz": 70746, "linear_dense_total": 4718592, "linear_dense_nnz": 191871}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 173679, "linear_attention_total": 2359296, "linear_attention_nnz": 36271, "linear_dense_total": 4718592, "linear_dense_nnz": 137408}}, "total_sparsity": 65.68486388119823, "linear_sparsity": 84.21337575088313}, "speed": {"eval_elapsed_time": 78.30115663801553, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 45252556, "linear_total": 84934656, "linear_nnz": 21294026, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2152743, "linear_attention_total": 2359296, "linear_attention_nnz": 158912, "linear_dense_total": 4718592, "linear_dense_nnz": 1993831}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2265132, "linear_attention_total": 2359296, "linear_attention_nnz": 234395, "linear_dense_total": 4718592, "linear_dense_nnz": 2030737}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415512, "linear_attention_total": 2359296, "linear_attention_nnz": 301048, "linear_dense_total": 4718592, "linear_dense_nnz": 2114464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465567, "linear_attention_total": 2359296, "linear_attention_nnz": 358791, "linear_dense_total": 4718592, "linear_dense_nnz": 2106776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2457267, "linear_attention_total": 2359296, "linear_attention_nnz": 398673, "linear_dense_total": 4718592, "linear_dense_nnz": 2058594}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2410577, "linear_attention_total": 2359296, "linear_attention_nnz": 367333, "linear_dense_total": 4718592, "linear_dense_nnz": 2043244}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2206780, "linear_attention_total": 2359296, "linear_attention_nnz": 344288, "linear_dense_total": 4718592, "linear_dense_nnz": 1862492}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1819031, "linear_attention_total": 2359296, "linear_attention_nnz": 304514, "linear_dense_total": 4718592, "linear_dense_nnz": 1514517}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1364821, "linear_attention_total": 2359296, "linear_attention_nnz": 265513, "linear_dense_total": 4718592, "linear_dense_nnz": 1099308}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 828990, "linear_attention_total": 2359296, "linear_attention_nnz": 201714, "linear_dense_total": 4718592, "linear_dense_nnz": 627276}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574541, "linear_attention_total": 2359296, "linear_attention_nnz": 134277, "linear_dense_total": 4718592, "linear_dense_nnz": 440264}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 333065, "linear_attention_total": 2359296, "linear_attention_nnz": 63309, "linear_dense_total": 4718592, "linear_dense_nnz": 269756}}, "total_sparsity": 58.4431701722824, "linear_sparsity": 74.92893124804085}, "speed": {"eval_elapsed_time": 81.4040583850001, "optimize_mode": "block_sparse"}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 31863042, "linear_total": 84934656, "linear_nnz": 7904512, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 562688, "linear_attention_total": 2359296, "linear_attention_nnz": 260096, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852736, "linear_attention_total": 2359296, "linear_attention_nnz": 361216, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 836352, "linear_attention_total": 2359296, "linear_attention_nnz": 249600, "linear_dense_total": 4718592, "linear_dense_nnz": 586752}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1037824, "linear_attention_total": 2359296, "linear_attention_nnz": 487936, "linear_dense_total": 4718592, "linear_dense_nnz": 549888}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 981760, "linear_attention_total": 2359296, "linear_attention_nnz": 315136, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893184, "linear_attention_total": 2359296, "linear_attention_nnz": 329472, "linear_dense_total": 4718592, "linear_dense_nnz": 563712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621312, "linear_attention_total": 2359296, "linear_attention_nnz": 160512, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817664, "linear_attention_total": 2359296, "linear_attention_nnz": 407552, "linear_dense_total": 4718592, "linear_dense_nnz": 410112}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 446208, "linear_attention_total": 2359296, "linear_attention_nnz": 175872, "linear_dense_total": 4718592, "linear_dense_nnz": 270336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315392, "linear_attention_total": 2359296, "linear_attention_nnz": 218624, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279552, "linear_attention_total": 2359296, "linear_attention_nnz": 187392, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 259840, "linear_attention_total": 2359296, "linear_attention_nnz": 118528, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}}, "total_sparsity": 70.73917738066733, "linear_sparsity": 90.6934196566358}, "speed": {"eval_elapsed_time": 12.247032377053984}, "opt_eval_metrics": {"exact_match": 75.42100283822138, "f1": 84.06571558378387}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 38467586, "linear_total": 84934656, "linear_nnz": 14509056, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1740288, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 940032, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 448512}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1992192, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1728000, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1651200, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 717312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245696, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877056, "linear_attention_total": 2359296, "linear_attention_nnz": 442368, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049088, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 629760, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 233472}}, "total_sparsity": 64.67401918059409, "linear_sparsity": 82.9173900462963}, "speed": {"eval_elapsed_time": 12.096938933013007}, "opt_eval_metrics": {"exact_match": 77.9848628192999, "f1": 85.88807770994393}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 38065154, "linear_total": 84934656, "linear_nnz": 14106624, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1669632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 913920, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 422400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969152, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 631296}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1559040, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1219584, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 471552}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 955392, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1090560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 431616, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.04358500448319, "linear_sparsity": 83.3912037037037}, "speed": {"eval_elapsed_time": 24.926402524928562}, "opt_eval_metrics": {"exact_match": 78.29706717123936, "f1": 86.2648683969933}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38095874, "linear_total": 84934656, "linear_nnz": 14137344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907776, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1967616, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 591360}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1711104, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1608192, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1214976, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 625152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1161216, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 953856, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 413184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1041408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 482304, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 757248, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.0153738728886, "linear_sparsity": 83.35503472222221}, "speed": {"eval_elapsed_time": 11.562090583960526}, "opt_eval_metrics": {"exact_match": 77.43614001892148, "f1": 85.51882546766822}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 37879298, "linear_total": 84934656, "linear_nnz": 13920768, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1160960, "linear_attention_total": 2359296, "linear_attention_nnz": 454400, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245184, "linear_attention_total": 2359296, "linear_attention_nnz": 400384, "linear_dense_total": 4718592, "linear_dense_nnz": 844800}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553408, "linear_attention_total": 2359296, "linear_attention_nnz": 518144, "linear_dense_total": 4718592, "linear_dense_nnz": 1035264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1787648, "linear_attention_total": 2359296, "linear_attention_nnz": 803072, "linear_dense_total": 4718592, "linear_dense_nnz": 984576}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1640960, "linear_attention_total": 2359296, "linear_attention_nnz": 555008, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661696, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1015296}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383680, "linear_attention_total": 2359296, "linear_attention_nnz": 583424, "linear_dense_total": 4718592, "linear_dense_nnz": 800256}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1309440, "linear_attention_total": 2359296, "linear_attention_nnz": 652032, "linear_dense_total": 4718592, "linear_dense_nnz": 657408}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 689664, "linear_attention_total": 2359296, "linear_attention_nnz": 333312, "linear_dense_total": 4718592, "linear_dense_nnz": 356352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520960, "linear_attention_total": 2359296, "linear_attention_nnz": 382720, "linear_dense_total": 4718592, "linear_dense_nnz": 138240}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 461568, "linear_attention_total": 2359296, "linear_attention_nnz": 314112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 505600, "linear_attention_total": 2359296, "linear_attention_nnz": 203008, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}}, "total_sparsity": 65.21426235063046, "linear_sparsity": 83.61002604166666}, "speed": {"eval_elapsed_time": 14.720699563040398}, "opt_eval_metrics": {"exact_match": 76.82119205298014, "f1": 85.28474303662432}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35435778, "linear_total": 84934656, "linear_nnz": 11477248, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887040, "linear_attention_total": 2359296, "linear_attention_nnz": 384768, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1057792, "linear_attention_total": 2359296, "linear_attention_nnz": 355840, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1285888, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497088, "linear_attention_total": 2359296, "linear_attention_nnz": 672256, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1350912, "linear_attention_total": 2359296, "linear_attention_nnz": 418560, "linear_dense_total": 4718592, "linear_dense_nnz": 932352}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1395712, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1154816, "linear_attention_total": 2359296, "linear_attention_nnz": 498944, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059840, "linear_attention_total": 2359296, "linear_attention_nnz": 497664, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 609024, "linear_attention_total": 2359296, "linear_attention_nnz": 297216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 436224, "linear_attention_total": 2359296, "linear_attention_nnz": 316416, "linear_dense_total": 4718592, "linear_dense_nnz": 119808}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 256512, "linear_dense_total": 4718592, "linear_dense_nnz": 115200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371200, "linear_attention_total": 2359296, "linear_attention_nnz": 150016, "linear_dense_total": 4718592, "linear_dense_nnz": 221184}}, "total_sparsity": 67.45822277621669, "linear_sparsity": 86.4869671103395}, "speed": {"eval_elapsed_time": 13.966550998971798}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.29050695680083}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 49759613, "linear_total": 84934656, "linear_nnz": 25846272, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3251712, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2803200, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1918464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3320832, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3353088, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2075136}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2469888, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2322432, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1929216}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2098176, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1508352}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1641984, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1248768}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638912, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566784, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1075200, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 583680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1304064, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1009152}}, "total_sparsity": 54.304199529987116, "linear_sparsity": 69.56922743055556}, "speed": {"eval_elapsed_time": 14.872020053910092}, "opt_eval_metrics": {"exact_match": 77.8713339640492, "f1": 85.86552240887988}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 47529298, "linear_total": 84934656, "linear_nnz": 23617536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2446848, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1660416}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2978304, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1995264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2216448, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1724928}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1824768, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1333248}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526784, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1133568}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35236717199184, "linear_sparsity": 72.19328703703704}, "speed": {"eval_elapsed_time": 13.847230941057205}, "opt_eval_metrics": {"exact_match": 78.06054872280038, "f1": 85.94002543374285}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47521613, "linear_total": 84934656, "linear_nnz": 23609856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2445312, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1658880}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2976768, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1993728}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214912, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1723392}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1823232, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1525248, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1132032}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35942454654601, "linear_sparsity": 72.2023292824074}, "speed": {"eval_elapsed_time": 13.841004910878837}, "opt_eval_metrics": {"exact_match": 78.10785241248817, "f1": 86.00835164251778}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35398714, "linear_total": 84934656, "linear_nnz": 11493376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907264, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1074176, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253376, "linear_attention_total": 2359296, "linear_attention_nnz": 402432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1508352, "linear_attention_total": 2359296, "linear_attention_nnz": 681984, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1328640, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1422848, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1094656, "linear_attention_total": 2359296, "linear_attention_nnz": 449536, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1102848, "linear_attention_total": 2359296, "linear_attention_nnz": 577536, "linear_dense_total": 4718592, "linear_dense_nnz": 525312}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 628224, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 434176, "linear_attention_total": 2359296, "linear_attention_nnz": 320512, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 377344, "linear_attention_total": 2359296, "linear_attention_nnz": 256000, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 361472, "linear_attention_total": 2359296, "linear_attention_nnz": 146432, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 67.49225980035152, "linear_sparsity": 86.46797839506173}, "speed": {"eval_elapsed_time": 13.929598903981969}, "opt_eval_metrics": {"exact_match": 77.18070009460737, "f1": 85.6109462422114}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold_apme-sigmoied_threshold_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 61067266, "linear_total": 84934656, "linear_nnz": 37108736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3221504, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 2607104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3504128, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 2899968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4439040, "linear_attention_total": 2359296, "linear_attention_nnz": 730112, "linear_dense_total": 4718592, "linear_dense_nnz": 3708928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4859904, "linear_attention_total": 2359296, "linear_attention_nnz": 1044480, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4734976, "linear_attention_total": 2359296, "linear_attention_nnz": 1012736, "linear_dense_total": 4718592, "linear_dense_nnz": 3722240}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4660224, "linear_attention_total": 2359296, "linear_attention_nnz": 882688, "linear_dense_total": 4718592, "linear_dense_nnz": 3777536}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4239360, "linear_attention_total": 2359296, "linear_attention_nnz": 980992, "linear_dense_total": 4718592, "linear_dense_nnz": 3258368}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3137536, "linear_attention_total": 2359296, "linear_attention_nnz": 903168, "linear_dense_total": 4718592, "linear_dense_nnz": 2234368}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835008, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 1124352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877568, "linear_attention_total": 2359296, "linear_attention_nnz": 552960, "linear_dense_total": 4718592, "linear_dense_nnz": 324608}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852992, "linear_attention_total": 2359296, "linear_attention_nnz": 401408, "linear_dense_total": 4718592, "linear_dense_nnz": 451584}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746496, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 501760}}, "total_sparsity": 43.920030037508496, "linear_sparsity": 56.309076003086425}, "speed": {"eval_elapsed_time": 47.75363156700041}, "opt_eval_metrics": null}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-30000": {"stats": {"total": 108893186, "nnz": 67034114, "linear_total": 84934656, "linear_nnz": 43075584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4692480, "linear_attention_total": 2359296, "linear_attention_nnz": 892416, "linear_dense_total": 4718592, "linear_dense_nnz": 3800064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4379136, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 3657216}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4735488, "linear_attention_total": 2359296, "linear_attention_nnz": 920064, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4850688, "linear_attention_total": 2359296, "linear_attention_nnz": 1052160, "linear_dense_total": 4718592, "linear_dense_nnz": 3798528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4751616, "linear_attention_total": 2359296, "linear_attention_nnz": 1118976, "linear_dense_total": 4718592, "linear_dense_nnz": 3632640}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4547328, "linear_attention_total": 2359296, "linear_attention_nnz": 1017600, "linear_dense_total": 4718592, "linear_dense_nnz": 3529728}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4306944, "linear_attention_total": 2359296, "linear_attention_nnz": 1061376, "linear_dense_total": 4718592, "linear_dense_nnz": 3245568}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3525888, "linear_attention_total": 2359296, "linear_attention_nnz": 793344, "linear_dense_total": 4718592, "linear_dense_nnz": 2732544}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2793216, "linear_attention_total": 2359296, "linear_attention_nnz": 919296, "linear_dense_total": 4718592, "linear_dense_nnz": 1873920}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1507584, "linear_attention_total": 2359296, "linear_attention_nnz": 541440, "linear_dense_total": 4718592, "linear_dense_nnz": 966144}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130496, "linear_attention_total": 2359296, "linear_attention_nnz": 443904, "linear_dense_total": 4718592, "linear_dense_nnz": 686592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1854720, "linear_attention_total": 2359296, "linear_attention_nnz": 332544, "linear_dense_total": 4718592, "linear_dense_nnz": 1522176}}, "total_sparsity": 38.44048791078626, "linear_sparsity": 49.283854166666664}, "speed": {"eval_elapsed_time": 30.27796263305936}, "opt_eval_metrics": {"exact_match": 79.40397350993378, "f1": 86.95662988564573}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 43891202, "linear_total": 84934656, "linear_nnz": 19932672, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045184, "linear_attention_total": 2359296, "linear_attention_nnz": 427776, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102784, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 1708032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2424576, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 1955328}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2502912, "linear_attention_total": 2359296, "linear_attention_nnz": 579840, "linear_dense_total": 4718592, "linear_dense_nnz": 1923072}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2376960, "linear_attention_total": 2359296, "linear_attention_nnz": 539904, "linear_dense_total": 4718592, "linear_dense_nnz": 1837056}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2201856, "linear_attention_total": 2359296, "linear_attention_nnz": 424704, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1907712, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 1468416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1580544, "linear_attention_total": 2359296, "linear_attention_nnz": 428544, "linear_dense_total": 4718592, "linear_dense_nnz": 1152000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1095168, "linear_attention_total": 2359296, "linear_attention_nnz": 397824, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527616, "linear_attention_total": 2359296, "linear_attention_nnz": 235776, "linear_dense_total": 4718592, "linear_dense_nnz": 291840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 428544, "linear_attention_total": 2359296, "linear_attention_nnz": 182784, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738816, "linear_attention_total": 2359296, "linear_attention_nnz": 112128, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}}, "total_sparsity": 59.6933438975695, "linear_sparsity": 76.53175636574075}, "speed": {"eval_elapsed_time": 20.700779567006975}, "opt_eval_metrics": {"exact_match": 79.13907284768212, "f1": 86.92362610004827}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49229570, "linear_total": 84934656, "linear_nnz": 25271040, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214400, "linear_attention_total": 2359296, "linear_attention_nnz": 721408, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2390784, "linear_attention_total": 2359296, "linear_attention_nnz": 635136, "linear_dense_total": 4718592, "linear_dense_nnz": 1755648}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850560, "linear_attention_total": 2359296, "linear_attention_nnz": 972032, "linear_dense_total": 4718592, "linear_dense_nnz": 1878528}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3188736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 1932288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3149824, "linear_attention_total": 2359296, "linear_attention_nnz": 1260544, "linear_dense_total": 4718592, "linear_dense_nnz": 1889280}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 1784832}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2455040, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015744, "linear_attention_total": 2359296, "linear_attention_nnz": 988160, "linear_dense_total": 4718592, "linear_dense_nnz": 1027584}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1550080, "linear_attention_total": 2359296, "linear_attention_nnz": 903424, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 886784, "linear_attention_total": 2359296, "linear_attention_nnz": 636416, "linear_dense_total": 4718592, "linear_dense_nnz": 250368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 682752, "linear_attention_total": 2359296, "linear_attention_nnz": 484608, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 980224, "linear_attention_total": 2359296, "linear_attention_nnz": 313600, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}}, "total_sparsity": 54.79095450471988, "linear_sparsity": 70.2464916087963}, "speed": {"eval_elapsed_time": 20.624390312936157}, "opt_eval_metrics": {"exact_match": 80.5771050141911, "f1": 88.02575212811699}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 53943554, "linear_total": 84934656, "linear_nnz": 29985024, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3576576, "linear_attention_total": 2359296, "linear_attention_nnz": 840960, "linear_dense_total": 4718592, "linear_dense_nnz": 2735616}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070464, "linear_attention_total": 2359296, "linear_attention_nnz": 288768, "linear_dense_total": 4718592, "linear_dense_nnz": 2781696}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3222528, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 3024384}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3771648, "linear_attention_total": 2359296, "linear_attention_nnz": 770304, "linear_dense_total": 4718592, "linear_dense_nnz": 3001344}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3129600, "linear_attention_total": 2359296, "linear_attention_nnz": 393984, "linear_dense_total": 4718592, "linear_dense_nnz": 2735616}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2864640, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 2668032}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2801664, "linear_attention_total": 2359296, "linear_attention_nnz": 548352, "linear_dense_total": 4718592, "linear_dense_nnz": 2253312}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2118144, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 1920000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1646592, "linear_attention_total": 2359296, "linear_attention_nnz": 284160, "linear_dense_total": 4718592, "linear_dense_nnz": 1362432}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 861696, "linear_attention_total": 2359296, "linear_attention_nnz": 202752, "linear_dense_total": 4718592, "linear_dense_nnz": 658944}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 288000, "linear_dense_total": 4718592, "linear_dense_nnz": 907776}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725696, "linear_attention_total": 2359296, "linear_attention_nnz": 315648, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}}, "total_sparsity": 50.461956361530284, "linear_sparsity": 64.69636140046296}, "speed": {"eval_elapsed_time": 14.420848372974433}, "opt_eval_metrics": {"exact_match": 73.90728476821192, "f1": 82.48749394175648}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49808642, "linear_total": 84934656, "linear_nnz": 25850112, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2888448, "linear_attention_total": 2359296, "linear_attention_nnz": 652032, "linear_dense_total": 4718592, "linear_dense_nnz": 2236416}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2680320, "linear_attention_total": 2359296, "linear_attention_nnz": 293376, "linear_dense_total": 4718592, "linear_dense_nnz": 2386944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2864640, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 2666496}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3171072, "linear_attention_total": 2359296, "linear_attention_nnz": 530688, "linear_dense_total": 4718592, "linear_dense_nnz": 2640384}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2757888, "linear_attention_total": 2359296, "linear_attention_nnz": 392448, "linear_dense_total": 4718592, "linear_dense_nnz": 2365440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2569728, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 2373120}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2244096, "linear_attention_total": 2359296, "linear_attention_nnz": 310272, "linear_dense_total": 4718592, "linear_dense_nnz": 1933824}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1843968, "linear_attention_total": 2359296, "linear_attention_nnz": 197376, "linear_dense_total": 4718592, "linear_dense_nnz": 1646592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1384704, "linear_attention_total": 2359296, "linear_attention_nnz": 200448, "linear_dense_total": 4718592, "linear_dense_nnz": 1184256}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 204288, "linear_dense_total": 4718592, "linear_dense_nnz": 556032}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1115904, "linear_attention_total": 2359296, "linear_attention_nnz": 286464, "linear_dense_total": 4718592, "linear_dense_nnz": 829440}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1569024, "linear_attention_total": 2359296, "linear_attention_nnz": 315648, "linear_dense_total": 4718592, "linear_dense_nnz": 1253376}}, "total_sparsity": 54.25917467416189, "linear_sparsity": 69.56470630787037}, "speed": {"eval_elapsed_time": 12.429447512025945}, "opt_eval_metrics": {"exact_match": 70.05676442762535, "f1": 79.26883508935717}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 42173698, "linear_total": 84934656, "linear_nnz": 18215168, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1516544, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1758464, "linear_attention_total": 2359296, "linear_attention_nnz": 564992, "linear_dense_total": 4718592, "linear_dense_nnz": 1193472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2030080, "linear_attention_total": 2359296, "linear_attention_nnz": 646144, "linear_dense_total": 4718592, "linear_dense_nnz": 1383936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2328832, "linear_attention_total": 2359296, "linear_attention_nnz": 969472, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2283264, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1425408}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2099200, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 1396224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1846784, "linear_attention_total": 2359296, "linear_attention_nnz": 774656, "linear_dense_total": 4718592, "linear_dense_nnz": 1072128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1589760, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 783360}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 967424, "linear_attention_total": 2359296, "linear_attention_nnz": 520448, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 617216, "linear_attention_total": 2359296, "linear_attention_nnz": 435968, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 521984, "linear_attention_total": 2359296, "linear_attention_nnz": 354560, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655616, "linear_attention_total": 2359296, "linear_attention_nnz": 231680, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.27058124647028, "linear_sparsity": 78.55390383873457}, "speed": {"eval_elapsed_time": 16.997670065960847}, "opt_eval_metrics": {"exact_match": 79.2620624408704, "f1": 86.94475047733708}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42038274, "linear_total": 84934656, "linear_nnz": 18079744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1493248, "linear_attention_total": 2359296, "linear_attention_nnz": 519424, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757440, "linear_attention_total": 2359296, "linear_attention_nnz": 565504, "linear_dense_total": 4718592, "linear_dense_nnz": 1191936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028800, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1382400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2297088, "linear_attention_total": 2359296, "linear_attention_nnz": 937728, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2270464, "linear_attention_total": 2359296, "linear_attention_nnz": 846592, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081792, "linear_attention_total": 2359296, "linear_attention_nnz": 688640, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815296, "linear_attention_total": 2359296, "linear_attention_nnz": 744704, "linear_dense_total": 4718592, "linear_dense_nnz": 1070592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1613312, "linear_attention_total": 2359296, "linear_attention_nnz": 831488, "linear_dense_total": 4718592, "linear_dense_nnz": 781824}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 522496, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 594944, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 513792, "linear_attention_total": 2359296, "linear_attention_nnz": 346368, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 644096, "linear_attention_total": 2359296, "linear_attention_nnz": 220160, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.39494531824976, "linear_sparsity": 78.7133487654321}, "speed": {"eval_elapsed_time": 16.94853257900104}, "opt_eval_metrics": {"exact_match": 79.2620624408704, "f1": 86.97983371588884}}}, "base_speed_report": {"eval_elapsed_time": 38.708956059999764}} \ No newline at end of file diff --git a/analysis/files/results/results9.json b/analysis/files/results/results9.json deleted file mode 100644 index 00d0d833..00000000 --- a/analysis/files/results/results9.json +++ /dev/null @@ -1 +0,0 @@ -{"checkpoints": {"/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 60368184, "linear_total": 84934656, "linear_nnz": 36440832, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3788032, "linear_attention_total": 2359296, "linear_attention_nnz": 1170688, "linear_dense_total": 4718592, "linear_dense_nnz": 2617344}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3460352, "linear_attention_total": 2359296, "linear_attention_nnz": 956672, "linear_dense_total": 4718592, "linear_dense_nnz": 2503680}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4003072, "linear_attention_total": 2359296, "linear_attention_nnz": 1288960, "linear_dense_total": 4718592, "linear_dense_nnz": 2714112}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4128768, "linear_attention_total": 2359296, "linear_attention_nnz": 1483776, "linear_dense_total": 4718592, "linear_dense_nnz": 2644992}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4293120, "linear_attention_total": 2359296, "linear_attention_nnz": 1712640, "linear_dense_total": 4718592, "linear_dense_nnz": 2580480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4090112, "linear_attention_total": 2359296, "linear_attention_nnz": 1598720, "linear_dense_total": 4718592, "linear_dense_nnz": 2491392}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3693824, "linear_attention_total": 2359296, "linear_attention_nnz": 1647872, "linear_dense_total": 4718592, "linear_dense_nnz": 2045952}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2870272, "linear_attention_total": 2359296, "linear_attention_nnz": 1278976, "linear_dense_total": 4718592, "linear_dense_nnz": 1591296}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2240256, "linear_attention_total": 2359296, "linear_attention_nnz": 1321728, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1299200, "linear_attention_total": 2359296, "linear_attention_nnz": 826112, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1011456, "linear_attention_total": 2359296, "linear_attention_nnz": 676608, "linear_dense_total": 4718592, "linear_dense_nnz": 334848}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1562368, "linear_attention_total": 2359296, "linear_attention_nnz": 473344, "linear_dense_total": 4718592, "linear_dense_nnz": 1089024}}, "total_sparsity": 44.56201878416892, "linear_sparsity": 57.095449942129626}, "speed": {"eval_elapsed_time": 25.83545230398886}, "opt_eval_metrics": {"exact_match": 81.05960264900662, "f1": 88.35100701142292}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53990689, "linear_total": 84934656, "linear_nnz": 30067968, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2828544, "linear_attention_total": 2359296, "linear_attention_nnz": 880896, "linear_dense_total": 4718592, "linear_dense_nnz": 1947648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2819840, "linear_attention_total": 2359296, "linear_attention_nnz": 849152, "linear_dense_total": 4718592, "linear_dense_nnz": 1970688}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3367424, "linear_attention_total": 2359296, "linear_attention_nnz": 1169408, "linear_dense_total": 4718592, "linear_dense_nnz": 2198016}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3521280, "linear_attention_total": 2359296, "linear_attention_nnz": 1352448, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3693824, "linear_attention_total": 2359296, "linear_attention_nnz": 1524992, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3554560, "linear_attention_total": 2359296, "linear_attention_nnz": 1511680, "linear_dense_total": 4718592, "linear_dense_nnz": 2042880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2935296, "linear_attention_total": 2359296, "linear_attention_nnz": 1336320, "linear_dense_total": 4718592, "linear_dense_nnz": 1598976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2452992, "linear_attention_total": 2359296, "linear_attention_nnz": 1178112, "linear_dense_total": 4718592, "linear_dense_nnz": 1274880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1836032, "linear_attention_total": 2359296, "linear_attention_nnz": 1134080, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1024000, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 321024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 812032, "linear_attention_total": 2359296, "linear_attention_nnz": 583168, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222144, "linear_attention_total": 2359296, "linear_attention_nnz": 397312, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}}, "total_sparsity": 50.41867082482094, "linear_sparsity": 64.59870515046296}, "speed": {"eval_elapsed_time": 23.56436571292579}, "opt_eval_metrics": {"exact_match": 80.93661305581836, "f1": 88.35425478567389}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 53994017, "linear_total": 84934656, "linear_nnz": 30071296, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2825984, "linear_attention_total": 2359296, "linear_attention_nnz": 878336, "linear_dense_total": 4718592, "linear_dense_nnz": 1947648}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2823424, "linear_attention_total": 2359296, "linear_attention_nnz": 852736, "linear_dense_total": 4718592, "linear_dense_nnz": 1970688}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3366400, "linear_attention_total": 2359296, "linear_attention_nnz": 1168384, "linear_dense_total": 4718592, "linear_dense_nnz": 2198016}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3529216, "linear_attention_total": 2359296, "linear_attention_nnz": 1360384, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3694080, "linear_attention_total": 2359296, "linear_attention_nnz": 1525248, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3562240, "linear_attention_total": 2359296, "linear_attention_nnz": 1519360, "linear_dense_total": 4718592, "linear_dense_nnz": 2042880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2944768, "linear_attention_total": 2359296, "linear_attention_nnz": 1345792, "linear_dense_total": 4718592, "linear_dense_nnz": 1598976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2450176, "linear_attention_total": 2359296, "linear_attention_nnz": 1175296, "linear_dense_total": 4718592, "linear_dense_nnz": 1274880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828864, "linear_attention_total": 2359296, "linear_attention_nnz": 1126912, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023488, "linear_attention_total": 2359296, "linear_attention_nnz": 702464, "linear_dense_total": 4718592, "linear_dense_nnz": 321024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 812032, "linear_attention_total": 2359296, "linear_attention_nnz": 583168, "linear_dense_total": 4718592, "linear_dense_nnz": 228864}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1210624, "linear_attention_total": 2359296, "linear_attention_nnz": 385792, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}}, "total_sparsity": 50.41561461889819, "linear_sparsity": 64.5947868441358}, "speed": {"eval_elapsed_time": 23.61654355400242}, "opt_eval_metrics": {"exact_match": 81.11636707663197, "f1": 88.26635621180897}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l20--2021-01-19--16-59-13/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 49113499, "linear_total": 84934656, "linear_nnz": 25174883, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2532837, "linear_attention_total": 2359296, "linear_attention_nnz": 278464, "linear_dense_total": 4718592, "linear_dense_nnz": 2254373}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2724403, "linear_attention_total": 2359296, "linear_attention_nnz": 411200, "linear_dense_total": 4718592, "linear_dense_nnz": 2313203}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2772181, "linear_attention_total": 2359296, "linear_attention_nnz": 388544, "linear_dense_total": 4718592, "linear_dense_nnz": 2383637}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2962889, "linear_attention_total": 2359296, "linear_attention_nnz": 616064, "linear_dense_total": 4718592, "linear_dense_nnz": 2346825}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2756799, "linear_attention_total": 2359296, "linear_attention_nnz": 475392, "linear_dense_total": 4718592, "linear_dense_nnz": 2281407}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2741284, "linear_attention_total": 2359296, "linear_attention_nnz": 485760, "linear_dense_total": 4718592, "linear_dense_nnz": 2255524}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2526246, "linear_attention_total": 2359296, "linear_attention_nnz": 436416, "linear_dense_total": 4718592, "linear_dense_nnz": 2089830}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2233017, "linear_attention_total": 2359296, "linear_attention_nnz": 473664, "linear_dense_total": 4718592, "linear_dense_nnz": 1759353}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652692, "linear_attention_total": 2359296, "linear_attention_nnz": 292096, "linear_dense_total": 4718592, "linear_dense_nnz": 1360596}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1056535, "linear_attention_total": 2359296, "linear_attention_nnz": 260864, "linear_dense_total": 4718592, "linear_dense_nnz": 795671}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 795434, "linear_attention_total": 2359296, "linear_attention_nnz": 207872, "linear_dense_total": 4718592, "linear_dense_nnz": 587562}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 420566, "linear_attention_total": 2359296, "linear_attention_nnz": 115648, "linear_dense_total": 4718592, "linear_dense_nnz": 304918}}, "total_sparsity": 54.89754611459343, "linear_sparsity": 70.35970452391072}, "speed": {"eval_elapsed_time": 29.429046569159254}, "opt_eval_metrics": {"exact_match": 80.15137180700094, "f1": 87.62280270760408}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 50398933, "linear_total": 84934656, "linear_nnz": 26460853, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673633, "linear_attention_total": 2359296, "linear_attention_nnz": 341248, "linear_dense_total": 4718592, "linear_dense_nnz": 2332385}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850180, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 2387588}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2871056, "linear_attention_total": 2359296, "linear_attention_nnz": 412672, "linear_dense_total": 4718592, "linear_dense_nnz": 2458384}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114067, "linear_attention_total": 2359296, "linear_attention_nnz": 692736, "linear_dense_total": 4718592, "linear_dense_nnz": 2421331}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2853594, "linear_attention_total": 2359296, "linear_attention_nnz": 505088, "linear_dense_total": 4718592, "linear_dense_nnz": 2348506}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2871518, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 2322654}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2608144, "linear_attention_total": 2359296, "linear_attention_nnz": 469504, "linear_dense_total": 4718592, "linear_dense_nnz": 2138640}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2382631, "linear_attention_total": 2359296, "linear_attention_nnz": 552448, "linear_dense_total": 4718592, "linear_dense_nnz": 1830183}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757175, "linear_attention_total": 2359296, "linear_attention_nnz": 316672, "linear_dense_total": 4718592, "linear_dense_nnz": 1440503}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1151305, "linear_attention_total": 2359296, "linear_attention_nnz": 292096, "linear_dense_total": 4718592, "linear_dense_nnz": 859209}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 873504, "linear_attention_total": 2359296, "linear_attention_nnz": 227328, "linear_dense_total": 4718592, "linear_dense_nnz": 646176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 454046, "linear_attention_total": 2359296, "linear_attention_nnz": 128000, "linear_dense_total": 4718592, "linear_dense_nnz": 326046}}, "total_sparsity": 53.71709208691902, "linear_sparsity": 68.84563469592435}, "speed": {"eval_elapsed_time": 28.692298884037882}, "opt_eval_metrics": {"exact_match": 79.92431409649953, "f1": 87.57193515884181}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 50358753, "linear_total": 84934656, "linear_nnz": 26420688, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2668105, "linear_attention_total": 2359296, "linear_attention_nnz": 335872, "linear_dense_total": 4718592, "linear_dense_nnz": 2332233}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2839080, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 2387496}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2866908, "linear_attention_total": 2359296, "linear_attention_nnz": 408576, "linear_dense_total": 4718592, "linear_dense_nnz": 2458332}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3103682, "linear_attention_total": 2359296, "linear_attention_nnz": 682496, "linear_dense_total": 4718592, "linear_dense_nnz": 2421186}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2853238, "linear_attention_total": 2359296, "linear_attention_nnz": 504832, "linear_dense_total": 4718592, "linear_dense_nnz": 2348406}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2880784, "linear_attention_total": 2359296, "linear_attention_nnz": 558336, "linear_dense_total": 4718592, "linear_dense_nnz": 2322448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2614378, "linear_attention_total": 2359296, "linear_attention_nnz": 475904, "linear_dense_total": 4718592, "linear_dense_nnz": 2138474}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2372808, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 1830088}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1752978, "linear_attention_total": 2359296, "linear_attention_nnz": 312576, "linear_dense_total": 4718592, "linear_dense_nnz": 1440402}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1147129, "linear_attention_total": 2359296, "linear_attention_nnz": 288000, "linear_dense_total": 4718592, "linear_dense_nnz": 859129}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870927, "linear_attention_total": 2359296, "linear_attention_nnz": 224768, "linear_dense_total": 4718592, "linear_dense_nnz": 646159}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 450671, "linear_attention_total": 2359296, "linear_attention_nnz": 124672, "linear_dense_total": 4718592, "linear_dense_nnz": 325999}}, "total_sparsity": 53.75399063078199, "linear_sparsity": 68.89292399088542}, "speed": {"eval_elapsed_time": 28.704244010150433}, "opt_eval_metrics": {"exact_match": 80.02838221381268, "f1": 87.5280353923367}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 58360680, "linear_total": 84934656, "linear_nnz": 34416900, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3232823, "linear_attention_total": 2359296, "linear_attention_nnz": 405824, "linear_dense_total": 4718592, "linear_dense_nnz": 2826999}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3412647, "linear_attention_total": 2359296, "linear_attention_nnz": 543872, "linear_dense_total": 4718592, "linear_dense_nnz": 2868775}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3556851, "linear_attention_total": 2359296, "linear_attention_nnz": 613248, "linear_dense_total": 4718592, "linear_dense_nnz": 2943603}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3708198, "linear_attention_total": 2359296, "linear_attention_nnz": 791424, "linear_dense_total": 4718592, "linear_dense_nnz": 2916774}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3695959, "linear_attention_total": 2359296, "linear_attention_nnz": 819072, "linear_dense_total": 4718592, "linear_dense_nnz": 2876887}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3644598, "linear_attention_total": 2359296, "linear_attention_nnz": 788928, "linear_dense_total": 4718592, "linear_dense_nnz": 2855670}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3486486, "linear_attention_total": 2359296, "linear_attention_nnz": 761600, "linear_dense_total": 4718592, "linear_dense_nnz": 2724886}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114460, "linear_attention_total": 2359296, "linear_attention_nnz": 686464, "linear_dense_total": 4718592, "linear_dense_nnz": 2427996}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2616038, "linear_attention_total": 2359296, "linear_attention_nnz": 602496, "linear_dense_total": 4718592, "linear_dense_nnz": 2013542}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1775741, "linear_attention_total": 2359296, "linear_attention_nnz": 381632, "linear_dense_total": 4718592, "linear_dense_nnz": 1394109}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1407393, "linear_attention_total": 2359296, "linear_attention_nnz": 325760, "linear_dense_total": 4718592, "linear_dense_nnz": 1081633}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 765706, "linear_attention_total": 2359296, "linear_attention_nnz": 174016, "linear_dense_total": 4718592, "linear_dense_nnz": 591690}}, "total_sparsity": 46.405572153982156, "linear_sparsity": 59.47837829589844}, "speed": {"eval_elapsed_time": 33.08102096617222}, "opt_eval_metrics": {"exact_match": 81.00283822138127, "f1": 88.2671108560581}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 58344499, "linear_total": 84934656, "linear_nnz": 34400721, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3231632, "linear_attention_total": 2359296, "linear_attention_nnz": 404736, "linear_dense_total": 4718592, "linear_dense_nnz": 2826896}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3411716, "linear_attention_total": 2359296, "linear_attention_nnz": 543040, "linear_dense_total": 4718592, "linear_dense_nnz": 2868676}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3557965, "linear_attention_total": 2359296, "linear_attention_nnz": 614464, "linear_dense_total": 4718592, "linear_dense_nnz": 2943501}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3706774, "linear_attention_total": 2359296, "linear_attention_nnz": 790144, "linear_dense_total": 4718592, "linear_dense_nnz": 2916630}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3693580, "linear_attention_total": 2359296, "linear_attention_nnz": 816832, "linear_dense_total": 4718592, "linear_dense_nnz": 2876748}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3641505, "linear_attention_total": 2359296, "linear_attention_nnz": 785920, "linear_dense_total": 4718592, "linear_dense_nnz": 2855585}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3484162, "linear_attention_total": 2359296, "linear_attention_nnz": 759424, "linear_dense_total": 4718592, "linear_dense_nnz": 2724738}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3114894, "linear_attention_total": 2359296, "linear_attention_nnz": 687040, "linear_dense_total": 4718592, "linear_dense_nnz": 2427854}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2617066, "linear_attention_total": 2359296, "linear_attention_nnz": 603648, "linear_dense_total": 4718592, "linear_dense_nnz": 2013418}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1773359, "linear_attention_total": 2359296, "linear_attention_nnz": 379328, "linear_dense_total": 4718592, "linear_dense_nnz": 1394031}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404175, "linear_attention_total": 2359296, "linear_attention_nnz": 322624, "linear_dense_total": 4718592, "linear_dense_nnz": 1081551}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 763893, "linear_attention_total": 2359296, "linear_attention_nnz": 172288, "linear_dense_total": 4718592, "linear_dense_nnz": 591605}}, "total_sparsity": 46.42043166961797, "linear_sparsity": 59.49742705733687}, "speed": {"eval_elapsed_time": 33.090760480146855}, "opt_eval_metrics": {"exact_match": 81.01229895931883, "f1": 88.16022239737082}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 59478503, "linear_total": 84934656, "linear_nnz": 35536574, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3362923, "linear_attention_total": 2359296, "linear_attention_nnz": 466432, "linear_dense_total": 4718592, "linear_dense_nnz": 2896491}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3511822, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 2933262}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3642442, "linear_attention_total": 2359296, "linear_attention_nnz": 636672, "linear_dense_total": 4718592, "linear_dense_nnz": 3005770}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3843335, "linear_attention_total": 2359296, "linear_attention_nnz": 857344, "linear_dense_total": 4718592, "linear_dense_nnz": 2985991}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3768311, "linear_attention_total": 2359296, "linear_attention_nnz": 829184, "linear_dense_total": 4718592, "linear_dense_nnz": 2939127}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3670401, "linear_attention_total": 2359296, "linear_attention_nnz": 754432, "linear_dense_total": 4718592, "linear_dense_nnz": 2915969}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3555086, "linear_attention_total": 2359296, "linear_attention_nnz": 767488, "linear_dense_total": 4718592, "linear_dense_nnz": 2787598}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3250234, "linear_attention_total": 2359296, "linear_attention_nnz": 752640, "linear_dense_total": 4718592, "linear_dense_nnz": 2497594}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2669249, "linear_attention_total": 2359296, "linear_attention_nnz": 553472, "linear_dense_total": 4718592, "linear_dense_nnz": 2115777}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1903656, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 1490216}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522534, "linear_attention_total": 2359296, "linear_attention_nnz": 353792, "linear_dense_total": 4718592, "linear_dense_nnz": 1168742}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 836581, "linear_attention_total": 2359296, "linear_attention_nnz": 204032, "linear_dense_total": 4718592, "linear_dense_nnz": 632549}}, "total_sparsity": 45.379040521415185, "linear_sparsity": 58.160101337197375}, "speed": {"eval_elapsed_time": 30.383016001898795}, "opt_eval_metrics": {"exact_match": 80.93661305581836, "f1": 88.29241912882233}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 59470230, "linear_total": 84934656, "linear_nnz": 35528301, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3365714, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 2896466}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3508110, "linear_attention_total": 2359296, "linear_attention_nnz": 574976, "linear_dense_total": 4718592, "linear_dense_nnz": 2933134}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3640290, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 3005666}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3837370, "linear_attention_total": 2359296, "linear_attention_nnz": 851456, "linear_dense_total": 4718592, "linear_dense_nnz": 2985914}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3769702, "linear_attention_total": 2359296, "linear_attention_nnz": 830720, "linear_dense_total": 4718592, "linear_dense_nnz": 2938982}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3672353, "linear_attention_total": 2359296, "linear_attention_nnz": 756480, "linear_dense_total": 4718592, "linear_dense_nnz": 2915873}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3555719, "linear_attention_total": 2359296, "linear_attention_nnz": 768256, "linear_dense_total": 4718592, "linear_dense_nnz": 2787463}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3250893, "linear_attention_total": 2359296, "linear_attention_nnz": 753408, "linear_dense_total": 4718592, "linear_dense_nnz": 2497485}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666597, "linear_attention_total": 2359296, "linear_attention_nnz": 550912, "linear_dense_total": 4718592, "linear_dense_nnz": 2115685}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1903316, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 1490132}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1524282, "linear_attention_total": 2359296, "linear_attention_nnz": 355584, "linear_dense_total": 4718592, "linear_dense_nnz": 1168698}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 833955, "linear_attention_total": 2359296, "linear_attention_nnz": 201472, "linear_dense_total": 4718592, "linear_dense_nnz": 632483}}, "total_sparsity": 45.38663787466004, "linear_sparsity": 58.16984176635742}, "speed": {"eval_elapsed_time": 30.506126267835498}, "opt_eval_metrics": {"exact_match": 80.77578051087986, "f1": 88.22778160568927}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41069735, "linear_total": 84934656, "linear_nnz": 17134148, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825423, "linear_attention_total": 2359296, "linear_attention_nnz": 185152, "linear_dense_total": 4718592, "linear_dense_nnz": 1640271}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2011232, "linear_attention_total": 2359296, "linear_attention_nnz": 309376, "linear_dense_total": 4718592, "linear_dense_nnz": 1701856}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2013521, "linear_attention_total": 2359296, "linear_attention_nnz": 266368, "linear_dense_total": 4718592, "linear_dense_nnz": 1747153}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2151481, "linear_attention_total": 2359296, "linear_attention_nnz": 452288, "linear_dense_total": 4718592, "linear_dense_nnz": 1699193}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1937929, "linear_attention_total": 2359296, "linear_attention_nnz": 315584, "linear_dense_total": 4718592, "linear_dense_nnz": 1622345}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1906344, "linear_attention_total": 2359296, "linear_attention_nnz": 324160, "linear_dense_total": 4718592, "linear_dense_nnz": 1582184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660767, "linear_attention_total": 2359296, "linear_attention_nnz": 264448, "linear_dense_total": 4718592, "linear_dense_nnz": 1396319}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1418922, "linear_attention_total": 2359296, "linear_attention_nnz": 312704, "linear_dense_total": 4718592, "linear_dense_nnz": 1106218}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 973188, "linear_attention_total": 2359296, "linear_attention_nnz": 176128, "linear_dense_total": 4718592, "linear_dense_nnz": 797060}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574608, "linear_attention_total": 2359296, "linear_attention_nnz": 178368, "linear_dense_total": 4718592, "linear_dense_nnz": 396240}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 423537, "linear_attention_total": 2359296, "linear_attention_nnz": 140224, "linear_dense_total": 4718592, "linear_dense_nnz": 283313}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 237196, "linear_attention_total": 2359296, "linear_attention_nnz": 82304, "linear_dense_total": 4718592, "linear_dense_nnz": 154892}}, "total_sparsity": 62.28438480989986, "linear_sparsity": 79.82667051715615}, "speed": {"eval_elapsed_time": 28.065979121020064}, "opt_eval_metrics": {"exact_match": 78.59981078524125, "f1": 86.70965342219107}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40928357, "linear_total": 84934656, "linear_nnz": 16992855, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1811376, "linear_attention_total": 2359296, "linear_attention_nnz": 181120, "linear_dense_total": 4718592, "linear_dense_nnz": 1630256}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1999638, "linear_attention_total": 2359296, "linear_attention_nnz": 307392, "linear_dense_total": 4718592, "linear_dense_nnz": 1692246}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2004326, "linear_attention_total": 2359296, "linear_attention_nnz": 266880, "linear_dense_total": 4718592, "linear_dense_nnz": 1737446}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2132105, "linear_attention_total": 2359296, "linear_attention_nnz": 442304, "linear_dense_total": 4718592, "linear_dense_nnz": 1689801}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1922729, "linear_attention_total": 2359296, "linear_attention_nnz": 309632, "linear_dense_total": 4718592, "linear_dense_nnz": 1613097}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886520, "linear_attention_total": 2359296, "linear_attention_nnz": 313664, "linear_dense_total": 4718592, "linear_dense_nnz": 1572856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1646649, "linear_attention_total": 2359296, "linear_attention_nnz": 259072, "linear_dense_total": 4718592, "linear_dense_nnz": 1387577}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404939, "linear_attention_total": 2359296, "linear_attention_nnz": 306112, "linear_dense_total": 4718592, "linear_dense_nnz": 1098827}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 964630, "linear_attention_total": 2359296, "linear_attention_nnz": 173184, "linear_dense_total": 4718592, "linear_dense_nnz": 791446}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566118, "linear_attention_total": 2359296, "linear_attention_nnz": 172928, "linear_dense_total": 4718592, "linear_dense_nnz": 393190}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417586, "linear_attention_total": 2359296, "linear_attention_nnz": 136448, "linear_dense_total": 4718592, "linear_dense_nnz": 281138}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 236239, "linear_attention_total": 2359296, "linear_attention_nnz": 82304, "linear_dense_total": 4718592, "linear_dense_nnz": 153935}}, "total_sparsity": 62.414216625088, "linear_sparsity": 79.99302546183267}, "speed": {"eval_elapsed_time": 28.04132828908041}, "opt_eval_metrics": {"exact_match": 78.78902554399244, "f1": 86.80367154149816}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 40912185, "linear_total": 84934656, "linear_nnz": 16976675, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1810859, "linear_attention_total": 2359296, "linear_attention_nnz": 180736, "linear_dense_total": 4718592, "linear_dense_nnz": 1630123}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998023, "linear_attention_total": 2359296, "linear_attention_nnz": 305920, "linear_dense_total": 4718592, "linear_dense_nnz": 1692103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2001199, "linear_attention_total": 2359296, "linear_attention_nnz": 263936, "linear_dense_total": 4718592, "linear_dense_nnz": 1737263}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2132118, "linear_attention_total": 2359296, "linear_attention_nnz": 442496, "linear_dense_total": 4718592, "linear_dense_nnz": 1689622}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1919231, "linear_attention_total": 2359296, "linear_attention_nnz": 306304, "linear_dense_total": 4718592, "linear_dense_nnz": 1612927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1884897, "linear_attention_total": 2359296, "linear_attention_nnz": 312128, "linear_dense_total": 4718592, "linear_dense_nnz": 1572769}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1645758, "linear_attention_total": 2359296, "linear_attention_nnz": 258304, "linear_dense_total": 4718592, "linear_dense_nnz": 1387454}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404565, "linear_attention_total": 2359296, "linear_attention_nnz": 305856, "linear_dense_total": 4718592, "linear_dense_nnz": 1098709}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 963790, "linear_attention_total": 2359296, "linear_attention_nnz": 172480, "linear_dense_total": 4718592, "linear_dense_nnz": 791310}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 564071, "linear_attention_total": 2359296, "linear_attention_nnz": 170944, "linear_dense_total": 4718592, "linear_dense_nnz": 393127}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 416716, "linear_attention_total": 2359296, "linear_attention_nnz": 135616, "linear_dense_total": 4718592, "linear_dense_nnz": 281100}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 235448, "linear_attention_total": 2359296, "linear_attention_nnz": 81536, "linear_dense_total": 4718592, "linear_dense_nnz": 153912}}, "total_sparsity": 62.42906787574385, "linear_sparsity": 80.01207540064682}, "speed": {"eval_elapsed_time": 28.038834661012515}, "opt_eval_metrics": {"exact_match": 78.6092715231788, "f1": 86.70267601348202}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39496838, "linear_total": 84934656, "linear_nnz": 15559744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1647204, "linear_attention_total": 2359296, "linear_attention_nnz": 145232, "linear_dense_total": 4718592, "linear_dense_nnz": 1501972}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842586, "linear_attention_total": 2359296, "linear_attention_nnz": 280192, "linear_dense_total": 4718592, "linear_dense_nnz": 1562394}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841850, "linear_attention_total": 2359296, "linear_attention_nnz": 234064, "linear_dense_total": 4718592, "linear_dense_nnz": 1607786}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1960732, "linear_attention_total": 2359296, "linear_attention_nnz": 386752, "linear_dense_total": 4718592, "linear_dense_nnz": 1573980}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1776766, "linear_attention_total": 2359296, "linear_attention_nnz": 281632, "linear_dense_total": 4718592, "linear_dense_nnz": 1495134}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1744230, "linear_attention_total": 2359296, "linear_attention_nnz": 288320, "linear_dense_total": 4718592, "linear_dense_nnz": 1455910}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518906, "linear_attention_total": 2359296, "linear_attention_nnz": 240864, "linear_dense_total": 4718592, "linear_dense_nnz": 1278042}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1276331, "linear_attention_total": 2359296, "linear_attention_nnz": 275424, "linear_dense_total": 4718592, "linear_dense_nnz": 1000907}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 878990, "linear_attention_total": 2359296, "linear_attention_nnz": 170816, "linear_dense_total": 4718592, "linear_dense_nnz": 708174}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496916, "linear_attention_total": 2359296, "linear_attention_nnz": 165920, "linear_dense_total": 4718592, "linear_dense_nnz": 330996}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360001, "linear_attention_total": 2359296, "linear_attention_nnz": 126288, "linear_dense_total": 4718592, "linear_dense_nnz": 233713}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 215232, "linear_attention_total": 2359296, "linear_attention_nnz": 73824, "linear_dense_total": 4718592, "linear_dense_nnz": 141408}}, "total_sparsity": 63.728825052469304, "linear_sparsity": 81.68033552758487}, "speed": {"eval_elapsed_time": 29.592536952113733}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 87.22039562207584}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39357122, "linear_total": 84934656, "linear_nnz": 15420094, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1634237, "linear_attention_total": 2359296, "linear_attention_nnz": 142224, "linear_dense_total": 4718592, "linear_dense_nnz": 1492013}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828361, "linear_attention_total": 2359296, "linear_attention_nnz": 275696, "linear_dense_total": 4718592, "linear_dense_nnz": 1552665}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825967, "linear_attention_total": 2359296, "linear_attention_nnz": 227984, "linear_dense_total": 4718592, "linear_dense_nnz": 1597983}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1943566, "linear_attention_total": 2359296, "linear_attention_nnz": 379616, "linear_dense_total": 4718592, "linear_dense_nnz": 1563950}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1761455, "linear_attention_total": 2359296, "linear_attention_nnz": 275824, "linear_dense_total": 4718592, "linear_dense_nnz": 1485631}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1729290, "linear_attention_total": 2359296, "linear_attention_nnz": 282736, "linear_dense_total": 4718592, "linear_dense_nnz": 1446554}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1504955, "linear_attention_total": 2359296, "linear_attention_nnz": 235856, "linear_dense_total": 4718592, "linear_dense_nnz": 1269099}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1263225, "linear_attention_total": 2359296, "linear_attention_nnz": 269520, "linear_dense_total": 4718592, "linear_dense_nnz": 993705}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870496, "linear_attention_total": 2359296, "linear_attention_nnz": 167616, "linear_dense_total": 4718592, "linear_dense_nnz": 702880}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489695, "linear_attention_total": 2359296, "linear_attention_nnz": 161552, "linear_dense_total": 4718592, "linear_dense_nnz": 328143}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 355803, "linear_attention_total": 2359296, "linear_attention_nnz": 124096, "linear_dense_total": 4718592, "linear_dense_nnz": 231707}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 213044, "linear_attention_total": 2359296, "linear_attention_nnz": 72608, "linear_dense_total": 4718592, "linear_dense_nnz": 140436}}, "total_sparsity": 63.85713060135829, "linear_sparsity": 81.84475604398752}, "speed": {"eval_elapsed_time": 29.660654196050018}, "opt_eval_metrics": {"exact_match": 79.12961210974456, "f1": 87.04337592394437}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 39354055, "linear_total": 84934656, "linear_nnz": 15417031, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1634041, "linear_attention_total": 2359296, "linear_attention_nnz": 142224, "linear_dense_total": 4718592, "linear_dense_nnz": 1491817}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1828346, "linear_attention_total": 2359296, "linear_attention_nnz": 275888, "linear_dense_total": 4718592, "linear_dense_nnz": 1552458}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825560, "linear_attention_total": 2359296, "linear_attention_nnz": 227744, "linear_dense_total": 4718592, "linear_dense_nnz": 1597816}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1942802, "linear_attention_total": 2359296, "linear_attention_nnz": 379008, "linear_dense_total": 4718592, "linear_dense_nnz": 1563794}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1761660, "linear_attention_total": 2359296, "linear_attention_nnz": 276192, "linear_dense_total": 4718592, "linear_dense_nnz": 1485468}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1728493, "linear_attention_total": 2359296, "linear_attention_nnz": 282096, "linear_dense_total": 4718592, "linear_dense_nnz": 1446397}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1504843, "linear_attention_total": 2359296, "linear_attention_nnz": 235856, "linear_dense_total": 4718592, "linear_dense_nnz": 1268987}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1262994, "linear_attention_total": 2359296, "linear_attention_nnz": 269456, "linear_dense_total": 4718592, "linear_dense_nnz": 993538}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 870263, "linear_attention_total": 2359296, "linear_attention_nnz": 167520, "linear_dense_total": 4718592, "linear_dense_nnz": 702743}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489503, "linear_attention_total": 2359296, "linear_attention_nnz": 161424, "linear_dense_total": 4718592, "linear_dense_nnz": 328079}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 355610, "linear_attention_total": 2359296, "linear_attention_nnz": 123920, "linear_dense_total": 4718592, "linear_dense_nnz": 231690}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 212916, "linear_attention_total": 2359296, "linear_attention_nnz": 72512, "linear_dense_total": 4718592, "linear_dense_nnz": 140404}}, "total_sparsity": 63.859947122862216, "linear_sparsity": 81.84836234575437}, "speed": {"eval_elapsed_time": 29.750202575000003}, "opt_eval_metrics": {"exact_match": 79.09176915799432, "f1": 86.93076968810146}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l10--2021-01-19--17-00-07/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 56967217, "linear_total": 84934656, "linear_nnz": 33019881, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3067235, "linear_attention_total": 2359296, "linear_attention_nnz": 356016, "linear_dense_total": 4718592, "linear_dense_nnz": 2711219}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3260347, "linear_attention_total": 2359296, "linear_attention_nnz": 506400, "linear_dense_total": 4718592, "linear_dense_nnz": 2753947}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3486676, "linear_attention_total": 2359296, "linear_attention_nnz": 658880, "linear_dense_total": 4718592, "linear_dense_nnz": 2827796}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3592390, "linear_attention_total": 2359296, "linear_attention_nnz": 782176, "linear_dense_total": 4718592, "linear_dense_nnz": 2810214}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3644732, "linear_attention_total": 2359296, "linear_attention_nnz": 874272, "linear_dense_total": 4718592, "linear_dense_nnz": 2770460}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3523230, "linear_attention_total": 2359296, "linear_attention_nnz": 772928, "linear_dense_total": 4718592, "linear_dense_nnz": 2750302}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3378315, "linear_attention_total": 2359296, "linear_attention_nnz": 767984, "linear_dense_total": 4718592, "linear_dense_nnz": 2610331}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2983346, "linear_attention_total": 2359296, "linear_attention_nnz": 687968, "linear_dense_total": 4718592, "linear_dense_nnz": 2295378}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465095, "linear_attention_total": 2359296, "linear_attention_nnz": 596368, "linear_dense_total": 4718592, "linear_dense_nnz": 1868727}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1650223, "linear_attention_total": 2359296, "linear_attention_nnz": 404448, "linear_dense_total": 4718592, "linear_dense_nnz": 1245775}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1262562, "linear_attention_total": 2359296, "linear_attention_nnz": 305952, "linear_dense_total": 4718592, "linear_dense_nnz": 956610}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705730, "linear_attention_total": 2359296, "linear_attention_nnz": 172864, "linear_dense_total": 4718592, "linear_dense_nnz": 532866}}, "total_sparsity": 47.6852325727709, "linear_sparsity": 61.12319451791268}, "speed": {"eval_elapsed_time": 35.13715321500786}, "opt_eval_metrics": {"exact_match": 81.3434247871334, "f1": 88.502960365548}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l40--2021-01-19--16-58-18/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 42014844, "linear_total": 84934656, "linear_nnz": 18080164, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1930333, "linear_attention_total": 2359296, "linear_attention_nnz": 211712, "linear_dense_total": 4718592, "linear_dense_nnz": 1718621}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2116878, "linear_attention_total": 2359296, "linear_attention_nnz": 345600, "linear_dense_total": 4718592, "linear_dense_nnz": 1771278}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2094823, "linear_attention_total": 2359296, "linear_attention_nnz": 278016, "linear_dense_total": 4718592, "linear_dense_nnz": 1816807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2266081, "linear_attention_total": 2359296, "linear_attention_nnz": 493312, "linear_dense_total": 4718592, "linear_dense_nnz": 1772769}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1986893, "linear_attention_total": 2359296, "linear_attention_nnz": 304128, "linear_dense_total": 4718592, "linear_dense_nnz": 1682765}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1992507, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 1635131}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1736239, "linear_attention_total": 2359296, "linear_attention_nnz": 278528, "linear_dense_total": 4718592, "linear_dense_nnz": 1457711}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529879, "linear_attention_total": 2359296, "linear_attention_nnz": 355072, "linear_dense_total": 4718592, "linear_dense_nnz": 1174807}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1051417, "linear_attention_total": 2359296, "linear_attention_nnz": 183552, "linear_dense_total": 4718592, "linear_dense_nnz": 867865}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 636321, "linear_attention_total": 2359296, "linear_attention_nnz": 196864, "linear_dense_total": 4718592, "linear_dense_nnz": 439457}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 483651, "linear_attention_total": 2359296, "linear_attention_nnz": 157696, "linear_dense_total": 4718592, "linear_dense_nnz": 325955}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 255142, "linear_attention_total": 2359296, "linear_attention_nnz": 90368, "linear_dense_total": 4718592, "linear_dense_nnz": 164774}}, "total_sparsity": 61.41646181607727, "linear_sparsity": 78.7128542676384}, "speed": {"eval_elapsed_time": 27.474724027095363}, "opt_eval_metrics": {"exact_match": 78.86471144749291, "f1": 86.87223379259328}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_0/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39362145, "linear_total": 84934656, "linear_nnz": 15449344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1074688, "linear_attention_total": 2359296, "linear_attention_nnz": 796672, "linear_dense_total": 4718592, "linear_dense_nnz": 278016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236224, "linear_attention_total": 2359296, "linear_attention_nnz": 769280, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1635328, "linear_attention_total": 2359296, "linear_attention_nnz": 1056256, "linear_dense_total": 4718592, "linear_dense_nnz": 579072}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 1259264, "linear_dense_total": 4718592, "linear_dense_nnz": 640512}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2041856, "linear_attention_total": 2359296, "linear_attention_nnz": 1344512, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860096, "linear_attention_total": 2359296, "linear_attention_nnz": 1244160, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571072, "linear_attention_total": 2359296, "linear_attention_nnz": 1088768, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1299200, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 382464}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1150464, "linear_attention_total": 2359296, "linear_attention_nnz": 955392, "linear_dense_total": 4718592, "linear_dense_nnz": 195072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668928, "linear_attention_total": 2359296, "linear_attention_nnz": 590592, "linear_dense_total": 4718592, "linear_dense_nnz": 78336}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 548352, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 72192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 463360, "linear_attention_total": 2359296, "linear_attention_nnz": 308224, "linear_dense_total": 4718592, "linear_dense_nnz": 155136}}, "total_sparsity": 63.85251782420986, "linear_sparsity": 81.81031780478395}, "speed": {"eval_elapsed_time": 18.814206156879663}, "opt_eval_metrics": {"exact_match": 79.33774834437087, "f1": 87.07382313022637}}, "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l20--2021-01-19--17-00-34/checkpoint-110660": {"stats": {"total": 108893186, "nnz": 47377613, "linear_total": 84934656, "linear_nnz": 23436196, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2342065, "linear_attention_total": 2359296, "linear_attention_nnz": 233808, "linear_dense_total": 4718592, "linear_dense_nnz": 2108257}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2536721, "linear_attention_total": 2359296, "linear_attention_nnz": 370912, "linear_dense_total": 4718592, "linear_dense_nnz": 2165809}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2607352, "linear_attention_total": 2359296, "linear_attention_nnz": 368864, "linear_dense_total": 4718592, "linear_dense_nnz": 2238488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2740822, "linear_attention_total": 2359296, "linear_attention_nnz": 528528, "linear_dense_total": 4718592, "linear_dense_nnz": 2212294}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2662766, "linear_attention_total": 2359296, "linear_attention_nnz": 515168, "linear_dense_total": 4718592, "linear_dense_nnz": 2147598}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2582248, "linear_attention_total": 2359296, "linear_attention_nnz": 456576, "linear_dense_total": 4718592, "linear_dense_nnz": 2125672}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2384302, "linear_attention_total": 2359296, "linear_attention_nnz": 426512, "linear_dense_total": 4718592, "linear_dense_nnz": 1957790}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045939, "linear_attention_total": 2359296, "linear_attention_nnz": 424416, "linear_dense_total": 4718592, "linear_dense_nnz": 1621523}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1531552, "linear_attention_total": 2359296, "linear_attention_nnz": 311248, "linear_dense_total": 4718592, "linear_dense_nnz": 1220304}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 936640, "linear_attention_total": 2359296, "linear_attention_nnz": 249120, "linear_dense_total": 4718592, "linear_dense_nnz": 687520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 686193, "linear_attention_total": 2359296, "linear_attention_nnz": 189856, "linear_dense_total": 4718592, "linear_dense_nnz": 496337}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379596, "linear_attention_total": 2359296, "linear_attention_nnz": 106192, "linear_dense_total": 4718592, "linear_dense_nnz": 273404}}, "total_sparsity": 56.49166422589565, "linear_sparsity": 72.40679234634212}, "speed": {"eval_elapsed_time": 32.3695623409003}, "opt_eval_metrics": {"exact_match": 80.54872280037843, "f1": 88.09731480353894}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42067458, "linear_total": 84934656, "linear_nnz": 18108928, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437184, "linear_attention_total": 2359296, "linear_attention_nnz": 472576, "linear_dense_total": 4718592, "linear_dense_nnz": 964608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1754624, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015488, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2276608, "linear_attention_total": 2359296, "linear_attention_nnz": 951040, "linear_dense_total": 4718592, "linear_dense_nnz": 1325568}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2280448, "linear_attention_total": 2359296, "linear_attention_nnz": 861184, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2123008, "linear_attention_total": 2359296, "linear_attention_nnz": 779008, "linear_dense_total": 4718592, "linear_dense_nnz": 1344000}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841152, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1041408}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553664, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042432, "linear_attention_total": 2359296, "linear_attention_nnz": 610816, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 584960, "linear_attention_total": 2359296, "linear_attention_nnz": 405248, "linear_dense_total": 4718592, "linear_dense_nnz": 179712}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 540928, "linear_attention_total": 2359296, "linear_attention_nnz": 395008, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 658432, "linear_attention_total": 2359296, "linear_attention_nnz": 217600, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}}, "total_sparsity": 61.3681447432349, "linear_sparsity": 78.6789882330247}, "speed": {"eval_elapsed_time": 17.91969774197787}, "opt_eval_metrics": {"exact_match": 79.38505203405866, "f1": 87.07610213911921}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41735426, "linear_total": 84934656, "linear_nnz": 17776896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1405440, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 929280}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1732352, "linear_attention_total": 2359296, "linear_attention_nnz": 589568, "linear_dense_total": 4718592, "linear_dense_nnz": 1142784}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1979136, "linear_attention_total": 2359296, "linear_attention_nnz": 628992, "linear_dense_total": 4718592, "linear_dense_nnz": 1350144}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2218752, "linear_attention_total": 2359296, "linear_attention_nnz": 913152, "linear_dense_total": 4718592, "linear_dense_nnz": 1305600}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2257664, "linear_attention_total": 2359296, "linear_attention_nnz": 850688, "linear_dense_total": 4718592, "linear_dense_nnz": 1406976}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096384, "linear_attention_total": 2359296, "linear_attention_nnz": 764672, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1786112, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 1022976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1538816, "linear_attention_total": 2359296, "linear_attention_nnz": 781568, "linear_dense_total": 4718592, "linear_dense_nnz": 757248}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1027840, "linear_attention_total": 2359296, "linear_attention_nnz": 596224, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 176640}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523008, "linear_attention_total": 2359296, "linear_attention_nnz": 378624, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640000, "linear_attention_total": 2359296, "linear_attention_nnz": 208384, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.67306005721974, "linear_sparsity": 79.0699146412037}, "speed": {"eval_elapsed_time": 17.6927186998073}, "opt_eval_metrics": {"exact_match": 78.72280037842951, "f1": 86.62745564109652}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 43462146, "linear_total": 84934656, "linear_nnz": 19503616, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660672, "linear_attention_total": 2359296, "linear_attention_nnz": 579328, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 632576, "linear_dense_total": 4718592, "linear_dense_nnz": 1267200}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2031104, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1446912}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2544128, "linear_attention_total": 2359296, "linear_attention_nnz": 1049600, "linear_dense_total": 4718592, "linear_dense_nnz": 1494528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2395904, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 1479168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2184960, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 1394688}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1912320, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1113600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 969216, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 471808, "linear_dense_total": 4718592, "linear_dense_nnz": 497664}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 717312, "linear_attention_total": 2359296, "linear_attention_nnz": 505344, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631040, "linear_attention_total": 2359296, "linear_attention_nnz": 448256, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 288256, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}}, "total_sparsity": 60.08735936884057, "linear_sparsity": 77.03691647376543}, "speed": {"eval_elapsed_time": 16.150802633957937}, "opt_eval_metrics": {"exact_match": 78.93093661305582, "f1": 86.85787750084084}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42678018, "linear_total": 84934656, "linear_nnz": 18719488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1560320, "linear_attention_total": 2359296, "linear_attention_nnz": 543488, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1816320, "linear_attention_total": 2359296, "linear_attention_nnz": 593664, "linear_dense_total": 4718592, "linear_dense_nnz": 1222656}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2024704, "linear_attention_total": 2359296, "linear_attention_nnz": 603904, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2329856, "linear_attention_total": 2359296, "linear_attention_nnz": 870656, "linear_dense_total": 4718592, "linear_dense_nnz": 1459200}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2332928, "linear_attention_total": 2359296, "linear_attention_nnz": 887552, "linear_dense_total": 4718592, "linear_dense_nnz": 1445376}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 720640, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742080, "linear_attention_total": 2359296, "linear_attention_nnz": 926464, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 944384, "linear_attention_total": 2359296, "linear_attention_nnz": 455936, "linear_dense_total": 4718592, "linear_dense_nnz": 488448}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705280, "linear_attention_total": 2359296, "linear_attention_nnz": 505600, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 587264, "linear_attention_total": 2359296, "linear_attention_nnz": 409088, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 697856, "linear_attention_total": 2359296, "linear_attention_nnz": 250880, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}}, "total_sparsity": 60.80744850279245, "linear_sparsity": 77.96012972608024}, "speed": {"eval_elapsed_time": 15.863338297931477}, "opt_eval_metrics": {"exact_match": 78.78902554399244, "f1": 86.64151988736798}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 46192898, "linear_total": 84934656, "linear_nnz": 22234368, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 551680, "linear_dense_total": 4718592, "linear_dense_nnz": 1539072}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2278656, "linear_attention_total": 2359296, "linear_attention_nnz": 596736, "linear_dense_total": 4718592, "linear_dense_nnz": 1681920}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2418688, "linear_attention_total": 2359296, "linear_attention_nnz": 567808, "linear_dense_total": 4718592, "linear_dense_nnz": 1850880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2844416, "linear_attention_total": 2359296, "linear_attention_nnz": 1002752, "linear_dense_total": 4718592, "linear_dense_nnz": 1841664}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2691072, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 1812480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2475264, "linear_attention_total": 2359296, "linear_attention_nnz": 721152, "linear_dense_total": 4718592, "linear_dense_nnz": 1754112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2229248, "linear_attention_total": 2359296, "linear_attention_nnz": 805376, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1966336, "linear_attention_total": 2359296, "linear_attention_nnz": 892672, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701440, "linear_attention_total": 2359296, "linear_attention_nnz": 454144, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598272, "linear_attention_total": 2359296, "linear_attention_nnz": 361728, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858880, "linear_attention_total": 2359296, "linear_attention_nnz": 238336, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}}, "total_sparsity": 57.57962486284496, "linear_sparsity": 73.82179542824075}, "speed": {"eval_elapsed_time": 17.57372920983471}, "opt_eval_metrics": {"exact_match": 79.39451277199622, "f1": 86.84346997900737}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39741442, "linear_total": 84934656, "linear_nnz": 15782912, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1171968, "linear_attention_total": 2359296, "linear_attention_nnz": 511488, "linear_dense_total": 4718592, "linear_dense_nnz": 660480}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1531136, "linear_attention_total": 2359296, "linear_attention_nnz": 591104, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1722112, "linear_attention_total": 2359296, "linear_attention_nnz": 656128, "linear_dense_total": 4718592, "linear_dense_nnz": 1065984}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2136320, "linear_attention_total": 2359296, "linear_attention_nnz": 985856, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2026752, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1168896}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1822976, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 1138176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488384, "linear_attention_total": 2359296, "linear_attention_nnz": 668160, "linear_dense_total": 4718592, "linear_dense_nnz": 820224}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1423104, "linear_attention_total": 2359296, "linear_attention_nnz": 793344, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 861184, "linear_attention_total": 2359296, "linear_attention_nnz": 494080, "linear_dense_total": 4718592, "linear_dense_nnz": 367104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 570880, "linear_attention_total": 2359296, "linear_attention_nnz": 417280, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 499968, "linear_attention_total": 2359296, "linear_attention_nnz": 370944, "linear_dense_total": 4718592, "linear_dense_nnz": 129024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 528128, "linear_attention_total": 2359296, "linear_attention_nnz": 224000, "linear_dense_total": 4718592, "linear_dense_nnz": 304128}}, "total_sparsity": 63.504197590471826, "linear_sparsity": 81.41758294753086}, "speed": {"eval_elapsed_time": 15.987980023957789}, "opt_eval_metrics": {"exact_match": 78.93093661305582, "f1": 86.77338922518314}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-130000": {"stats": {"total": 108893186, "nnz": 38778370, "linear_total": 84934656, "linear_nnz": 14819840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1050624, "linear_attention_total": 2359296, "linear_attention_nnz": 488448, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 512512, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1628160, "linear_attention_total": 2359296, "linear_attention_nnz": 628224, "linear_dense_total": 4718592, "linear_dense_nnz": 999936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998592, "linear_attention_total": 2359296, "linear_attention_nnz": 937216, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1939968, "linear_attention_total": 2359296, "linear_attention_nnz": 821760, "linear_dense_total": 4718592, "linear_dense_nnz": 1118208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1709824, "linear_attention_total": 2359296, "linear_attention_nnz": 648448, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404928, "linear_attention_total": 2359296, "linear_attention_nnz": 641536, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817920, "linear_attention_total": 2359296, "linear_attention_nnz": 467712, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 544512, "linear_attention_total": 2359296, "linear_attention_nnz": 403200, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484096, "linear_attention_total": 2359296, "linear_attention_nnz": 367360, "linear_dense_total": 4718592, "linear_dense_nnz": 116736}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496896, "linear_attention_total": 2359296, "linear_attention_nnz": 225024, "linear_dense_total": 4718592, "linear_dense_nnz": 271872}}, "total_sparsity": 64.38861656596218, "linear_sparsity": 82.5514805169753}, "speed": {"eval_elapsed_time": 15.812317132018507}, "opt_eval_metrics": {"exact_match": 78.88363292336803, "f1": 86.63235572290178}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-165000": {"stats": {"total": 108893186, "nnz": 38293506, "linear_total": 84934656, "linear_nnz": 14334976, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1010688, "linear_attention_total": 2359296, "linear_attention_nnz": 468480, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1371392, "linear_attention_total": 2359296, "linear_attention_nnz": 518912, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1590272, "linear_attention_total": 2359296, "linear_attention_nnz": 608768, "linear_dense_total": 4718592, "linear_dense_nnz": 981504}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895936, "linear_attention_total": 2359296, "linear_attention_nnz": 869888, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1869568, "linear_attention_total": 2359296, "linear_attention_nnz": 775936, "linear_dense_total": 4718592, "linear_dense_nnz": 1093632}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663232, "linear_attention_total": 2359296, "linear_attention_nnz": 618752, "linear_dense_total": 4718592, "linear_dense_nnz": 1044480}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 629248, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1295872, "linear_attention_total": 2359296, "linear_attention_nnz": 707584, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 808704, "linear_attention_total": 2359296, "linear_attention_nnz": 463104, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 515840, "linear_attention_total": 2359296, "linear_attention_nnz": 376064, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455936, "linear_attention_total": 2359296, "linear_attention_nnz": 345344, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 212992, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 64.83388225963009, "linear_sparsity": 83.1223476080247}, "speed": {"eval_elapsed_time": 15.71152348187752}, "opt_eval_metrics": {"exact_match": 78.96877956480606, "f1": 86.71968503618079}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38916354, "linear_total": 84934656, "linear_nnz": 14957824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1209344, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 749568}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1494272, "linear_attention_total": 2359296, "linear_attention_nnz": 488192, "linear_dense_total": 4718592, "linear_dense_nnz": 1006080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1636096, "linear_attention_total": 2359296, "linear_attention_nnz": 550144, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969664, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1746944, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 1198080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1782272, "linear_attention_total": 2359296, "linear_attention_nnz": 653312, "linear_dense_total": 4718592, "linear_dense_nnz": 1128960}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1461760, "linear_attention_total": 2359296, "linear_attention_nnz": 593920, "linear_dense_total": 4718592, "linear_dense_nnz": 867840}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 754688, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531968, "linear_attention_total": 2359296, "linear_attention_nnz": 373760, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460032, "linear_attention_total": 2359296, "linear_attention_nnz": 311040, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}}, "total_sparsity": 64.26190156654981, "linear_sparsity": 82.38902150848766}, "speed": {"eval_elapsed_time": 15.398823922965676}, "opt_eval_metrics": {"exact_match": 78.7038789025544, "f1": 86.58426699451658}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 50872322, "linear_total": 84934656, "linear_nnz": 26913792, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692352, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 2007552}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666496, "linear_attention_total": 2359296, "linear_attention_nnz": 646656, "linear_dense_total": 4718592, "linear_dense_nnz": 2019840}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2931200, "linear_attention_total": 2359296, "linear_attention_nnz": 691712, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3361024, "linear_attention_total": 2359296, "linear_attention_nnz": 1149184, "linear_dense_total": 4718592, "linear_dense_nnz": 2211840}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3165952, "linear_attention_total": 2359296, "linear_attention_nnz": 1007872, "linear_dense_total": 4718592, "linear_dense_nnz": 2158080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070976, "linear_attention_total": 2359296, "linear_attention_nnz": 997376, "linear_dense_total": 4718592, "linear_dense_nnz": 2073600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2644480, "linear_attention_total": 2359296, "linear_attention_nnz": 911872, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2248704, "linear_attention_total": 2359296, "linear_attention_nnz": 944640, "linear_dense_total": 4718592, "linear_dense_nnz": 1304064}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1514240, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 751104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 839424, "linear_attention_total": 2359296, "linear_attention_nnz": 526080, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 707072, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 274944}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1071872, "linear_attention_total": 2359296, "linear_attention_nnz": 277760, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}}, "total_sparsity": 53.282364242699266, "linear_sparsity": 68.31235532407408}, "speed": {"eval_elapsed_time": 19.785655258921906}, "opt_eval_metrics": {"exact_match": 79.99053926206244, "f1": 87.56439208763325}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl225_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 27752545, "linear_total": 84934656, "linear_nnz": 3794015, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 465383, "linear_attention_total": 2359296, "linear_attention_nnz": 18728, "linear_dense_total": 4718592, "linear_dense_nnz": 446655}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527397, "linear_attention_total": 2359296, "linear_attention_nnz": 63059, "linear_dense_total": 4718592, "linear_dense_nnz": 464338}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516492, "linear_attention_total": 2359296, "linear_attention_nnz": 53761, "linear_dense_total": 4718592, "linear_dense_nnz": 462731}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 514972, "linear_attention_total": 2359296, "linear_attention_nnz": 84624, "linear_dense_total": 4718592, "linear_dense_nnz": 430348}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 443214, "linear_attention_total": 2359296, "linear_attention_nnz": 58345, "linear_dense_total": 4718592, "linear_dense_nnz": 384869}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 396921, "linear_attention_total": 2359296, "linear_attention_nnz": 50615, "linear_dense_total": 4718592, "linear_dense_nnz": 346306}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 319004, "linear_attention_total": 2359296, "linear_attention_nnz": 41344, "linear_dense_total": 4718592, "linear_dense_nnz": 277660}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 249183, "linear_attention_total": 2359296, "linear_attention_nnz": 47420, "linear_dense_total": 4718592, "linear_dense_nnz": 201763}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 161062, "linear_attention_total": 2359296, "linear_attention_nnz": 27562, "linear_dense_total": 4718592, "linear_dense_nnz": 133500}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 81705, "linear_attention_total": 2359296, "linear_attention_nnz": 34151, "linear_dense_total": 4718592, "linear_dense_nnz": 47554}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64643, "linear_attention_total": 2359296, "linear_attention_nnz": 21311, "linear_dense_total": 4718592, "linear_dense_nnz": 43332}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 54039, "linear_attention_total": 2359296, "linear_attention_nnz": 17233, "linear_dense_total": 4718592, "linear_dense_nnz": 36806}}, "total_sparsity": 74.51397463933142, "linear_sparsity": 95.5330189363456}, "speed": {"eval_elapsed_time": 28.275199214927852}, "opt_eval_metrics": {"exact_match": 77.39829706717124, "f1": 85.66626983371626}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-45000": {"stats": {"total": 108893186, "nnz": 52655769, "linear_total": 84934656, "linear_nnz": 28740096, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3704832, "linear_attention_total": 2359296, "linear_attention_nnz": 1572864, "linear_dense_total": 4718592, "linear_dense_nnz": 2131968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2818560, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 2327040}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3674112, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2494464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3592704, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2314752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2942976, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 2156544}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2844672, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 2156544}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2363904, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 1675776}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1738752, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1345536}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1665024, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 878592}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893952, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 402432}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059840, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1440768, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 949248}}, "total_sparsity": 51.64456938563631, "linear_sparsity": 66.162109375}, "speed": {"eval_elapsed_time": 15.498567047994584}, "opt_eval_metrics": {"exact_match": 78.1929990539262, "f1": 85.92206431273945}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 47478801, "linear_total": 84934656, "linear_nnz": 23566848, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2959872, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1485312}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2121216, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1728000}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3085824, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1906176}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3084288, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1806336}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2422272, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 1734144}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2302464, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1712640}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1878528, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1288704}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437696, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1044480}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1494528, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 708096}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 617472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 322560}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 947712, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 456192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1214976, "linear_attention_total": 2359296, "linear_attention_nnz": 442368, "linear_dense_total": 4718592, "linear_dense_nnz": 772608}}, "total_sparsity": 56.39874013788153, "linear_sparsity": 72.2529658564815}, "speed": {"eval_elapsed_time": 14.233191919047385}, "opt_eval_metrics": {"exact_match": 77.69157994323557, "f1": 85.75507572992562}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41730197, "linear_total": 84934656, "linear_nnz": 17822208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2202624, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2397696, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1218048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2302464, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1122816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1794048, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1460736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1155072, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1290240, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 503808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 423936, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 227328}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 806400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 904704, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 511488}}, "total_sparsity": 61.67786201057612, "linear_sparsity": 79.0165653935185}, "speed": {"eval_elapsed_time": 12.337535696104169}, "opt_eval_metrics": {"exact_match": 77.70104068117313, "f1": 85.6071153919288}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 33917936, "linear_total": 84934656, "linear_nnz": 9959406, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1111233, "linear_attention_total": 2359296, "linear_attention_nnz": 56754, "linear_dense_total": 4718592, "linear_dense_nnz": 1054479}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222867, "linear_attention_total": 2359296, "linear_attention_nnz": 116764, "linear_dense_total": 4718592, "linear_dense_nnz": 1106103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264439, "linear_attention_total": 2359296, "linear_attention_nnz": 127558, "linear_dense_total": 4718592, "linear_dense_nnz": 1136881}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1270104, "linear_attention_total": 2359296, "linear_attention_nnz": 163709, "linear_dense_total": 4718592, "linear_dense_nnz": 1106395}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1202300, "linear_attention_total": 2359296, "linear_attention_nnz": 158018, "linear_dense_total": 4718592, "linear_dense_nnz": 1044282}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1136195, "linear_attention_total": 2359296, "linear_attention_nnz": 125746, "linear_dense_total": 4718592, "linear_dense_nnz": 1010449}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 971117, "linear_attention_total": 2359296, "linear_attention_nnz": 110023, "linear_dense_total": 4718592, "linear_dense_nnz": 861094}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746075, "linear_attention_total": 2359296, "linear_attention_nnz": 113086, "linear_dense_total": 4718592, "linear_dense_nnz": 632989}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488971, "linear_attention_total": 2359296, "linear_attention_nnz": 81879, "linear_dense_total": 4718592, "linear_dense_nnz": 407092}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 250695, "linear_attention_total": 2359296, "linear_attention_nnz": 77365, "linear_dense_total": 4718592, "linear_dense_nnz": 173330}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 172793, "linear_attention_total": 2359296, "linear_attention_nnz": 50915, "linear_dense_total": 4718592, "linear_dense_nnz": 121878}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 122617, "linear_attention_total": 2359296, "linear_attention_nnz": 28303, "linear_dense_total": 4718592, "linear_dense_nnz": 94314}}, "total_sparsity": 68.85210429971255, "linear_sparsity": 88.27403739646628}, "speed": {"eval_elapsed_time": 33.620146826142445}, "opt_eval_metrics": {"exact_match": 79.89593188268685, "f1": 87.64967103979136}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33825359, "linear_total": 84934656, "linear_nnz": 9866829, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1100628, "linear_attention_total": 2359296, "linear_attention_nnz": 56086, "linear_dense_total": 4718592, "linear_dense_nnz": 1044542}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211778, "linear_attention_total": 2359296, "linear_attention_nnz": 115328, "linear_dense_total": 4718592, "linear_dense_nnz": 1096450}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253069, "linear_attention_total": 2359296, "linear_attention_nnz": 125881, "linear_dense_total": 4718592, "linear_dense_nnz": 1127188}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258511, "linear_attention_total": 2359296, "linear_attention_nnz": 161525, "linear_dense_total": 4718592, "linear_dense_nnz": 1096986}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1191705, "linear_attention_total": 2359296, "linear_attention_nnz": 155911, "linear_dense_total": 4718592, "linear_dense_nnz": 1035794}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125428, "linear_attention_total": 2359296, "linear_attention_nnz": 123921, "linear_dense_total": 4718592, "linear_dense_nnz": 1001507}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 961919, "linear_attention_total": 2359296, "linear_attention_nnz": 108430, "linear_dense_total": 4718592, "linear_dense_nnz": 853489}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738628, "linear_attention_total": 2359296, "linear_attention_nnz": 111505, "linear_dense_total": 4718592, "linear_dense_nnz": 627123}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484188, "linear_attention_total": 2359296, "linear_attention_nnz": 80805, "linear_dense_total": 4718592, "linear_dense_nnz": 403383}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 247948, "linear_attention_total": 2359296, "linear_attention_nnz": 76456, "linear_dense_total": 4718592, "linear_dense_nnz": 171492}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 171235, "linear_attention_total": 2359296, "linear_attention_nnz": 50374, "linear_dense_total": 4718592, "linear_dense_nnz": 120861}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 121792, "linear_attention_total": 2359296, "linear_attention_nnz": 28038, "linear_dense_total": 4718592, "linear_dense_nnz": 93754}}, "total_sparsity": 68.93712063856779, "linear_sparsity": 88.38303530657733}, "speed": {"eval_elapsed_time": 33.69571442203596}, "opt_eval_metrics": {"exact_match": 79.8391674550615, "f1": 87.59923644792065}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl150_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 29470276, "linear_total": 84934656, "linear_nnz": 5511746, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655184, "linear_attention_total": 2359296, "linear_attention_nnz": 30729, "linear_dense_total": 4718592, "linear_dense_nnz": 624455}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 733131, "linear_attention_total": 2359296, "linear_attention_nnz": 77742, "linear_dense_total": 4718592, "linear_dense_nnz": 655389}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730379, "linear_attention_total": 2359296, "linear_attention_nnz": 70206, "linear_dense_total": 4718592, "linear_dense_nnz": 660173}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 734451, "linear_attention_total": 2359296, "linear_attention_nnz": 106339, "linear_dense_total": 4718592, "linear_dense_nnz": 628112}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655863, "linear_attention_total": 2359296, "linear_attention_nnz": 81845, "linear_dense_total": 4718592, "linear_dense_nnz": 574018}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 606306, "linear_attention_total": 2359296, "linear_attention_nnz": 68554, "linear_dense_total": 4718592, "linear_dense_nnz": 537752}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 492846, "linear_attention_total": 2359296, "linear_attention_nnz": 58217, "linear_dense_total": 4718592, "linear_dense_nnz": 434629}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379389, "linear_attention_total": 2359296, "linear_attention_nnz": 65705, "linear_dense_total": 4718592, "linear_dense_nnz": 313684}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 243207, "linear_attention_total": 2359296, "linear_attention_nnz": 39483, "linear_dense_total": 4718592, "linear_dense_nnz": 203724}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 119606, "linear_attention_total": 2359296, "linear_attention_nnz": 46007, "linear_dense_total": 4718592, "linear_dense_nnz": 73599}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 89281, "linear_attention_total": 2359296, "linear_attention_nnz": 27892, "linear_dense_total": 4718592, "linear_dense_nnz": 61389}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 72103, "linear_attention_total": 2359296, "linear_attention_nnz": 20781, "linear_dense_total": 4718592, "linear_dense_nnz": 51322}}, "total_sparsity": 72.93652882926945, "linear_sparsity": 93.51060419906804}, "speed": {"eval_elapsed_time": 30.31329287402332}, "opt_eval_metrics": {"exact_match": 78.4484389782403, "f1": 86.3547925481507}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 72624802, "linear_total": 84934656, "linear_nnz": 48687104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4657152, "linear_attention_total": 2359296, "linear_attention_nnz": 621568, "linear_dense_total": 4718592, "linear_dense_nnz": 4035584}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4759552, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 4155392}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5065728, "linear_attention_total": 2359296, "linear_attention_nnz": 781312, "linear_dense_total": 4718592, "linear_dense_nnz": 4284416}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5408768, "linear_attention_total": 2359296, "linear_attention_nnz": 1068032, "linear_dense_total": 4718592, "linear_dense_nnz": 4340736}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5324800, "linear_attention_total": 2359296, "linear_attention_nnz": 1087488, "linear_dense_total": 4718592, "linear_dense_nnz": 4237312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5189632, "linear_attention_total": 2359296, "linear_attention_nnz": 908288, "linear_dense_total": 4718592, "linear_dense_nnz": 4281344}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5169152, "linear_attention_total": 2359296, "linear_attention_nnz": 1019904, "linear_dense_total": 4718592, "linear_dense_nnz": 4149248}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4749312, "linear_attention_total": 2359296, "linear_attention_nnz": 921600, "linear_dense_total": 4718592, "linear_dense_nnz": 3827712}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3932160, "linear_attention_total": 2359296, "linear_attention_nnz": 851968, "linear_dense_total": 4718592, "linear_dense_nnz": 3080192}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1808384, "linear_attention_total": 2359296, "linear_attention_nnz": 529408, "linear_dense_total": 4718592, "linear_dense_nnz": 1278976}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1443840, "linear_attention_total": 2359296, "linear_attention_nnz": 486400, "linear_dense_total": 4718592, "linear_dense_nnz": 957440}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178624, "linear_attention_total": 2359296, "linear_attention_nnz": 286720, "linear_dense_total": 4718592, "linear_dense_nnz": 891904}}, "total_sparsity": 33.306385213120684, "linear_sparsity": 42.67698688271605}, "speed": {"eval_elapsed_time": 27.85703947697766}, "opt_eval_metrics": {"exact_match": 80.72847682119205, "f1": 88.08831525592305}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 72671586, "linear_total": 84934656, "linear_nnz": 48734208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4676608, "linear_attention_total": 2359296, "linear_attention_nnz": 644096, "linear_dense_total": 4718592, "linear_dense_nnz": 4032512}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4756480, "linear_attention_total": 2359296, "linear_attention_nnz": 583680, "linear_dense_total": 4718592, "linear_dense_nnz": 4172800}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5113856, "linear_attention_total": 2359296, "linear_attention_nnz": 789504, "linear_dense_total": 4718592, "linear_dense_nnz": 4324352}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5421056, "linear_attention_total": 2359296, "linear_attention_nnz": 1028096, "linear_dense_total": 4718592, "linear_dense_nnz": 4392960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5426176, "linear_attention_total": 2359296, "linear_attention_nnz": 1067008, "linear_dense_total": 4718592, "linear_dense_nnz": 4359168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5276672, "linear_attention_total": 2359296, "linear_attention_nnz": 943104, "linear_dense_total": 4718592, "linear_dense_nnz": 4333568}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5165056, "linear_attention_total": 2359296, "linear_attention_nnz": 1003520, "linear_dense_total": 4718592, "linear_dense_nnz": 4161536}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4797440, "linear_attention_total": 2359296, "linear_attention_nnz": 908288, "linear_dense_total": 4718592, "linear_dense_nnz": 3889152}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3890176, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 3021824}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1726464, "linear_attention_total": 2359296, "linear_attention_nnz": 520192, "linear_dense_total": 4718592, "linear_dense_nnz": 1206272}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1336320, "linear_attention_total": 2359296, "linear_attention_nnz": 445440, "linear_dense_total": 4718592, "linear_dense_nnz": 890880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1147904, "linear_attention_total": 2359296, "linear_attention_nnz": 272384, "linear_dense_total": 4718592, "linear_dense_nnz": 875520}}, "total_sparsity": 33.26342201062975, "linear_sparsity": 42.62152777777778}, "speed": {"eval_elapsed_time": 27.81183459307067}, "opt_eval_metrics": {"exact_match": 80.81362346263009, "f1": 88.10463591853348}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 72950082, "linear_total": 84934656, "linear_nnz": 49012736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4754432, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 4140032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4843520, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 4246528}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5145600, "linear_attention_total": 2359296, "linear_attention_nnz": 788480, "linear_dense_total": 4718592, "linear_dense_nnz": 4357120}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5488640, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 4426752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5463040, "linear_attention_total": 2359296, "linear_attention_nnz": 1048576, "linear_dense_total": 4718592, "linear_dense_nnz": 4414464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5317632, "linear_attention_total": 2359296, "linear_attention_nnz": 918528, "linear_dense_total": 4718592, "linear_dense_nnz": 4399104}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5230592, "linear_attention_total": 2359296, "linear_attention_nnz": 998400, "linear_dense_total": 4718592, "linear_dense_nnz": 4232192}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4838400, "linear_attention_total": 2359296, "linear_attention_nnz": 899072, "linear_dense_total": 4718592, "linear_dense_nnz": 3939328}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3848192, "linear_attention_total": 2359296, "linear_attention_nnz": 819200, "linear_dense_total": 4718592, "linear_dense_nnz": 3028992}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1620992, "linear_attention_total": 2359296, "linear_attention_nnz": 516096, "linear_dense_total": 4718592, "linear_dense_nnz": 1104896}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1332224, "linear_attention_total": 2359296, "linear_attention_nnz": 450560, "linear_dense_total": 4718592, "linear_dense_nnz": 881664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1129472, "linear_attention_total": 2359296, "linear_attention_nnz": 266240, "linear_dense_total": 4718592, "linear_dense_nnz": 863232}}, "total_sparsity": 33.00767047076757, "linear_sparsity": 42.29359567901234}, "speed": {"eval_elapsed_time": 27.788447924889624}, "opt_eval_metrics": {"exact_match": 80.53926206244087, "f1": 87.95145431777735}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39356610, "linear_total": 84934656, "linear_nnz": 15444992, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1024000, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 512000}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1236992, "linear_attention_total": 2359296, "linear_attention_nnz": 551936, "linear_dense_total": 4718592, "linear_dense_nnz": 685056}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1934336, "linear_attention_total": 2359296, "linear_attention_nnz": 722944, "linear_dense_total": 4718592, "linear_dense_nnz": 1211392}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2352128, "linear_attention_total": 2359296, "linear_attention_nnz": 954368, "linear_dense_total": 4718592, "linear_dense_nnz": 1397760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028544, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1238016}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1880064, "linear_attention_total": 2359296, "linear_attention_nnz": 584704, "linear_dense_total": 4718592, "linear_dense_nnz": 1295360}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1627136, "linear_attention_total": 2359296, "linear_attention_nnz": 608256, "linear_dense_total": 4718592, "linear_dense_nnz": 1018880}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1316864, "linear_attention_total": 2359296, "linear_attention_nnz": 740352, "linear_dense_total": 4718592, "linear_dense_nnz": 576512}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 673792, "linear_attention_total": 2359296, "linear_attention_nnz": 510976, "linear_dense_total": 4718592, "linear_dense_nnz": 162816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 451584, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 94208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 304128, "linear_dense_total": 4718592, "linear_dense_nnz": 197632}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417792, "linear_attention_total": 2359296, "linear_attention_nnz": 197632, "linear_dense_total": 4718592, "linear_dense_nnz": 220160}}, "total_sparsity": 63.85760078688487, "linear_sparsity": 81.81544174382715}, "speed": {"eval_elapsed_time": 17.21582882408984}, "opt_eval_metrics": {"exact_match": 76.79280983916746, "f1": 85.3167029862563}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39183362, "linear_total": 84934656, "linear_nnz": 15271936, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1032192, "linear_attention_total": 2359296, "linear_attention_nnz": 513024, "linear_dense_total": 4718592, "linear_dense_nnz": 519168}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1215488, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 692224}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1922048, "linear_attention_total": 2359296, "linear_attention_nnz": 683008, "linear_dense_total": 4718592, "linear_dense_nnz": 1239040}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2319360, "linear_attention_total": 2359296, "linear_attention_nnz": 945152, "linear_dense_total": 4718592, "linear_dense_nnz": 1374208}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045952, "linear_attention_total": 2359296, "linear_attention_nnz": 809984, "linear_dense_total": 4718592, "linear_dense_nnz": 1235968}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1847296, "linear_attention_total": 2359296, "linear_attention_nnz": 581632, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1607680, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1287168, "linear_attention_total": 2359296, "linear_attention_nnz": 708608, "linear_dense_total": 4718592, "linear_dense_nnz": 578560}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631808, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 158720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 442368, "linear_attention_total": 2359296, "linear_attention_nnz": 352256, "linear_dense_total": 4718592, "linear_dense_nnz": 90112}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 312320, "linear_dense_total": 4718592, "linear_dense_nnz": 206848}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 401408, "linear_attention_total": 2359296, "linear_attention_nnz": 186368, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 64.01669981444019, "linear_sparsity": 82.0191936728395}, "speed": {"eval_elapsed_time": 17.197634894168004}, "opt_eval_metrics": {"exact_match": 77.04824976348155, "f1": 85.17930403802184}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-50000": {"stats": {"total": 108893186, "nnz": 40770050, "linear_total": 84934656, "linear_nnz": 16811520, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2190336, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1641984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 855552}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2171904, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 992256}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1900032, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1717248, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 1029120}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1789440, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1003008}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1319424, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 729600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1030656, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 637440}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1179648, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 393216}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 715776, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 224256}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}}, "total_sparsity": 62.55959486757969, "linear_sparsity": 80.20652488425925}, "speed": {"eval_elapsed_time": 12.385207333834842}, "opt_eval_metrics": {"exact_match": 75.79943235572375, "f1": 84.3797785815339}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 37334018, "linear_total": 84934656, "linear_nnz": 13375488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663488, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1282560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451520, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 566784}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1385472, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1265664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 675840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1069056, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.7150099364344, "linear_sparsity": 84.25202546296296}, "speed": {"eval_elapsed_time": 11.265181887894869}, "opt_eval_metrics": {"exact_match": 76.54683065279092, "f1": 84.56290825102765}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37189634, "linear_total": 84934656, "linear_nnz": 13231104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1658880, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1233408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1354752, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 568320}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1387008, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 678912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 809472, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1065984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 279552}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 125952}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 645120, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 65.84760225492897, "linear_sparsity": 84.42201967592592}, "speed": {"eval_elapsed_time": 11.085542490938678}, "opt_eval_metrics": {"exact_match": 75.99810785241249, "f1": 84.26442986520863}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36773378, "linear_total": 84934656, "linear_nnz": 12814848, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1044480, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1177088, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1450496, "linear_attention_total": 2359296, "linear_attention_nnz": 492032, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652224, "linear_attention_total": 2359296, "linear_attention_nnz": 733696, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1511680, "linear_attention_total": 2359296, "linear_attention_nnz": 461056, "linear_dense_total": 4718592, "linear_dense_nnz": 1050624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1533952, "linear_attention_total": 2359296, "linear_attention_nnz": 580096, "linear_dense_total": 4718592, "linear_dense_nnz": 953856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1227520, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 764928}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 624384, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 700416, "linear_attention_total": 2359296, "linear_attention_nnz": 351744, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 479744, "linear_attention_total": 2359296, "linear_attention_nnz": 339968, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411392, "linear_attention_total": 2359296, "linear_attention_nnz": 276224, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 430080, "linear_attention_total": 2359296, "linear_attention_nnz": 178176, "linear_dense_total": 4718592, "linear_dense_nnz": 251904}}, "total_sparsity": 66.22986308803564, "linear_sparsity": 84.912109375}, "speed": {"eval_elapsed_time": 14.512992850970477}, "opt_eval_metrics": {"exact_match": 77.94701986754967, "f1": 86.06827252573265}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 47136529, "linear_total": 84934656, "linear_nnz": 23220736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1984512, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 1339392}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2164224, "linear_attention_total": 2359296, "linear_attention_nnz": 592896, "linear_dense_total": 4718592, "linear_dense_nnz": 1571328}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2625536, "linear_attention_total": 2359296, "linear_attention_nnz": 880640, "linear_dense_total": 4718592, "linear_dense_nnz": 1744896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2992640, "linear_attention_total": 2359296, "linear_attention_nnz": 1230848, "linear_dense_total": 4718592, "linear_dense_nnz": 1761792}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2940928, "linear_attention_total": 2359296, "linear_attention_nnz": 1214464, "linear_dense_total": 4718592, "linear_dense_nnz": 1726464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2535936, "linear_attention_total": 2359296, "linear_attention_nnz": 906240, "linear_dense_total": 4718592, "linear_dense_nnz": 1629696}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2213376, "linear_attention_total": 2359296, "linear_attention_nnz": 943104, "linear_dense_total": 4718592, "linear_dense_nnz": 1270272}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1923584, "linear_attention_total": 2359296, "linear_attention_nnz": 935936, "linear_dense_total": 4718592, "linear_dense_nnz": 987648}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1419264, "linear_attention_total": 2359296, "linear_attention_nnz": 872448, "linear_dense_total": 4718592, "linear_dense_nnz": 546816}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 883712, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667648, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 869376, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 574464}}, "total_sparsity": 56.713059162397904, "linear_sparsity": 72.66046971450618}, "speed": {"eval_elapsed_time": 18.717300809919834}, "opt_eval_metrics": {"exact_match": 80.27436140018922, "f1": 87.70461789964966}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46572775, "linear_total": 84934656, "linear_nnz": 22657536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1908736, "linear_attention_total": 2359296, "linear_attention_nnz": 627712, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2145280, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 1548288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2499072, "linear_attention_total": 2359296, "linear_attention_nnz": 789504, "linear_dense_total": 4718592, "linear_dense_nnz": 1709568}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2920960, "linear_attention_total": 2359296, "linear_attention_nnz": 1180672, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1204224, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2516992, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 1600512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2151936, "linear_attention_total": 2359296, "linear_attention_nnz": 909312, "linear_dense_total": 4718592, "linear_dense_nnz": 1242624}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1889792, "linear_attention_total": 2359296, "linear_attention_nnz": 917504, "linear_dense_total": 4718592, "linear_dense_nnz": 972288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1398272, "linear_attention_total": 2359296, "linear_attention_nnz": 856064, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858624, "linear_attention_total": 2359296, "linear_attention_nnz": 611328, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 634368, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 827392, "linear_attention_total": 2359296, "linear_attention_nnz": 268288, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}}, "total_sparsity": 57.23077199706509, "linear_sparsity": 73.32356770833333}, "speed": {"eval_elapsed_time": 18.605645736912265}, "opt_eval_metrics": {"exact_match": 79.80132450331126, "f1": 87.48291010744668}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-25000": {"stats": {"total": 108893186, "nnz": 97257474, "linear_total": 84934656, "linear_nnz": 73298944, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6452736, "linear_attention_total": 2359296, "linear_attention_nnz": 1889280, "linear_dense_total": 4718592, "linear_dense_nnz": 4563456}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6338048, "linear_attention_total": 2359296, "linear_attention_nnz": 1839104, "linear_dense_total": 4718592, "linear_dense_nnz": 4498944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6545920, "linear_attention_total": 2359296, "linear_attention_nnz": 2037760, "linear_dense_total": 4718592, "linear_dense_nnz": 4508160}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6564864, "linear_attention_total": 2359296, "linear_attention_nnz": 2056704, "linear_dense_total": 4718592, "linear_dense_nnz": 4508160}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6459904, "linear_attention_total": 2359296, "linear_attention_nnz": 2000896, "linear_dense_total": 4718592, "linear_dense_nnz": 4459008}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6377728, "linear_attention_total": 2359296, "linear_attention_nnz": 1963264, "linear_dense_total": 4718592, "linear_dense_nnz": 4414464}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6199808, "linear_attention_total": 2359296, "linear_attention_nnz": 1899008, "linear_dense_total": 4718592, "linear_dense_nnz": 4300800}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 6070272, "linear_attention_total": 2359296, "linear_attention_nnz": 1938432, "linear_dense_total": 4718592, "linear_dense_nnz": 4131840}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5774848, "linear_attention_total": 2359296, "linear_attention_nnz": 1905664, "linear_dense_total": 4718592, "linear_dense_nnz": 3869184}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5543168, "linear_attention_total": 2359296, "linear_attention_nnz": 1779968, "linear_dense_total": 4718592, "linear_dense_nnz": 3763200}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5265920, "linear_attention_total": 2359296, "linear_attention_nnz": 1662464, "linear_dense_total": 4718592, "linear_dense_nnz": 3603456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5705728, "linear_attention_total": 2359296, "linear_attention_nnz": 1722880, "linear_dense_total": 4718592, "linear_dense_nnz": 3982848}}, "total_sparsity": 10.685436276976967, "linear_sparsity": 13.699604552469136}, "speed": {"eval_elapsed_time": 39.35505584185012}, "opt_eval_metrics": {"exact_match": 70.26490066225166, "f1": 79.54946151241414}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 58295010, "linear_total": 84934656, "linear_nnz": 34364416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2712576, "linear_attention_total": 2359296, "linear_attention_nnz": 934912, "linear_dense_total": 4718592, "linear_dense_nnz": 1777664}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2907136, "linear_attention_total": 2359296, "linear_attention_nnz": 738304, "linear_dense_total": 4718592, "linear_dense_nnz": 2168832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4201472, "linear_attention_total": 2359296, "linear_attention_nnz": 1162240, "linear_dense_total": 4718592, "linear_dense_nnz": 3039232}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4531200, "linear_attention_total": 2359296, "linear_attention_nnz": 1366016, "linear_dense_total": 4718592, "linear_dense_nnz": 3165184}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4667392, "linear_attention_total": 2359296, "linear_attention_nnz": 1484800, "linear_dense_total": 4718592, "linear_dense_nnz": 3182592}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4599808, "linear_attention_total": 2359296, "linear_attention_nnz": 1414144, "linear_dense_total": 4718592, "linear_dense_nnz": 3185664}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3828736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 2572288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2659328, "linear_attention_total": 2359296, "linear_attention_nnz": 991232, "linear_dense_total": 4718592, "linear_dense_nnz": 1668096}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1654784, "linear_attention_total": 2359296, "linear_attention_nnz": 966656, "linear_dense_total": 4718592, "linear_dense_nnz": 688128}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 927744, "linear_attention_total": 2359296, "linear_attention_nnz": 691200, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 873472, "linear_attention_total": 2359296, "linear_attention_nnz": 530432, "linear_dense_total": 4718592, "linear_dense_nnz": 343040}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 800768, "linear_attention_total": 2359296, "linear_attention_nnz": 378880, "linear_dense_total": 4718592, "linear_dense_nnz": 421888}}, "total_sparsity": 46.46587895775224, "linear_sparsity": 59.540171682098766}, "speed": {"eval_elapsed_time": 25.710868231020868}, "opt_eval_metrics": {"exact_match": 79.64995269631031, "f1": 87.30139925832849}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 56885634, "linear_total": 84934656, "linear_nnz": 32956416, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2584576, "linear_attention_total": 2359296, "linear_attention_nnz": 949248, "linear_dense_total": 4718592, "linear_dense_nnz": 1635328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2798592, "linear_attention_total": 2359296, "linear_attention_nnz": 750592, "linear_dense_total": 4718592, "linear_dense_nnz": 2048000}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4019200, "linear_attention_total": 2359296, "linear_attention_nnz": 1123328, "linear_dense_total": 4718592, "linear_dense_nnz": 2895872}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4274176, "linear_attention_total": 2359296, "linear_attention_nnz": 1306624, "linear_dense_total": 4718592, "linear_dense_nnz": 2967552}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4581376, "linear_attention_total": 2359296, "linear_attention_nnz": 1475584, "linear_dense_total": 4718592, "linear_dense_nnz": 3105792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4219904, "linear_attention_total": 2359296, "linear_attention_nnz": 1285120, "linear_dense_total": 4718592, "linear_dense_nnz": 2934784}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3736576, "linear_attention_total": 2359296, "linear_attention_nnz": 1235968, "linear_dense_total": 4718592, "linear_dense_nnz": 2500608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2587648, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1604608}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1627136, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 661504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 880640, "linear_attention_total": 2359296, "linear_attention_nnz": 650240, "linear_dense_total": 4718592, "linear_dense_nnz": 230400}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 862208, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 352256}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784384, "linear_attention_total": 2359296, "linear_attention_nnz": 363520, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}}, "total_sparsity": 47.76015277944021, "linear_sparsity": 61.19791666666667}, "speed": {"eval_elapsed_time": 25.134117686888203}, "opt_eval_metrics": {"exact_match": 79.90539262062441, "f1": 87.36378709007766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 55520034, "linear_total": 84934656, "linear_nnz": 31592448, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2382848, "linear_attention_total": 2359296, "linear_attention_nnz": 889856, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2568192, "linear_attention_total": 2359296, "linear_attention_nnz": 717824, "linear_dense_total": 4718592, "linear_dense_nnz": 1850368}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3915776, "linear_attention_total": 2359296, "linear_attention_nnz": 1113088, "linear_dense_total": 4718592, "linear_dense_nnz": 2802688}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4258816, "linear_attention_total": 2359296, "linear_attention_nnz": 1297408, "linear_dense_total": 4718592, "linear_dense_nnz": 2961408}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4300800, "linear_attention_total": 2359296, "linear_attention_nnz": 1402880, "linear_dense_total": 4718592, "linear_dense_nnz": 2897920}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4030464, "linear_attention_total": 2359296, "linear_attention_nnz": 1157120, "linear_dense_total": 4718592, "linear_dense_nnz": 2873344}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3661824, "linear_attention_total": 2359296, "linear_attention_nnz": 1187840, "linear_dense_total": 4718592, "linear_dense_nnz": 2473984}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2507776, "linear_attention_total": 2359296, "linear_attention_nnz": 979968, "linear_dense_total": 4718592, "linear_dense_nnz": 1527808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1562624, "linear_attention_total": 2359296, "linear_attention_nnz": 952320, "linear_dense_total": 4718592, "linear_dense_nnz": 610304}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 865280, "linear_attention_total": 2359296, "linear_attention_nnz": 642048, "linear_dense_total": 4718592, "linear_dense_nnz": 223232}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818176, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 328704}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 719872, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 388096}}, "total_sparsity": 49.0142257386059, "linear_sparsity": 62.80381944444444}, "speed": {"eval_elapsed_time": 24.50548317306675}, "opt_eval_metrics": {"exact_match": 79.82024597918638, "f1": 87.30735739624531}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 55329122, "linear_total": 84934656, "linear_nnz": 31404032, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2845696, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 2385920}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3173376, "linear_attention_total": 2359296, "linear_attention_nnz": 374784, "linear_dense_total": 4718592, "linear_dense_nnz": 2798592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3866624, "linear_attention_total": 2359296, "linear_attention_nnz": 411648, "linear_dense_total": 4718592, "linear_dense_nnz": 3454976}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4224000, "linear_attention_total": 2359296, "linear_attention_nnz": 727040, "linear_dense_total": 4718592, "linear_dense_nnz": 3496960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3954688, "linear_attention_total": 2359296, "linear_attention_nnz": 541696, "linear_dense_total": 4718592, "linear_dense_nnz": 3412992}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3993600, "linear_attention_total": 2359296, "linear_attention_nnz": 545792, "linear_dense_total": 4718592, "linear_dense_nnz": 3447808}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3427328, "linear_attention_total": 2359296, "linear_attention_nnz": 493568, "linear_dense_total": 4718592, "linear_dense_nnz": 2933760}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2641920, "linear_attention_total": 2359296, "linear_attention_nnz": 641024, "linear_dense_total": 4718592, "linear_dense_nnz": 2000896}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1293312, "linear_attention_total": 2359296, "linear_attention_nnz": 288768, "linear_dense_total": 4718592, "linear_dense_nnz": 1004544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 678912, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 339968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 671744, "linear_attention_total": 2359296, "linear_attention_nnz": 254976, "linear_dense_total": 4718592, "linear_dense_nnz": 416768}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 632832, "linear_attention_total": 2359296, "linear_attention_nnz": 165888, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}}, "total_sparsity": 49.1895461668281, "linear_sparsity": 63.025655864197525}, "speed": {"eval_elapsed_time": 21.893441491993144}, "opt_eval_metrics": {"exact_match": 77.68211920529801, "f1": 86.11161494070976}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 54788706, "linear_total": 84934656, "linear_nnz": 30864384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2661376, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 2226176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3087360, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 2727936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3895296, "linear_attention_total": 2359296, "linear_attention_nnz": 421888, "linear_dense_total": 4718592, "linear_dense_nnz": 3473408}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4162560, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 3451904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3985408, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 3437568}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3881984, "linear_attention_total": 2359296, "linear_attention_nnz": 556032, "linear_dense_total": 4718592, "linear_dense_nnz": 3325952}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3340288, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 2828288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2614272, "linear_attention_total": 2359296, "linear_attention_nnz": 622592, "linear_dense_total": 4718592, "linear_dense_nnz": 1991680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1256448, "linear_attention_total": 2359296, "linear_attention_nnz": 276480, "linear_dense_total": 4718592, "linear_dense_nnz": 979968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668672, "linear_attention_total": 2359296, "linear_attention_nnz": 337920, "linear_dense_total": 4718592, "linear_dense_nnz": 330752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 664576, "linear_attention_total": 2359296, "linear_attention_nnz": 252928, "linear_dense_total": 4718592, "linear_dense_nnz": 411648}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 646144, "linear_attention_total": 2359296, "linear_attention_nnz": 158720, "linear_dense_total": 4718592, "linear_dense_nnz": 487424}}, "total_sparsity": 49.68582699012958, "linear_sparsity": 63.66102430555556}, "speed": {"eval_elapsed_time": 21.611296633956954}, "opt_eval_metrics": {"exact_match": 77.96594134342479, "f1": 86.01491496793933}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53430466, "linear_total": 84934656, "linear_nnz": 29507584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2533376, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 2119680}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2840576, "linear_attention_total": 2359296, "linear_attention_nnz": 364544, "linear_dense_total": 4718592, "linear_dense_nnz": 2476032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3843072, "linear_attention_total": 2359296, "linear_attention_nnz": 397312, "linear_dense_total": 4718592, "linear_dense_nnz": 3445760}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4069376, "linear_attention_total": 2359296, "linear_attention_nnz": 666624, "linear_dense_total": 4718592, "linear_dense_nnz": 3402752}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3831808, "linear_attention_total": 2359296, "linear_attention_nnz": 492544, "linear_dense_total": 4718592, "linear_dense_nnz": 3339264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3714048, "linear_attention_total": 2359296, "linear_attention_nnz": 519168, "linear_dense_total": 4718592, "linear_dense_nnz": 3194880}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3200000, "linear_attention_total": 2359296, "linear_attention_nnz": 448512, "linear_dense_total": 4718592, "linear_dense_nnz": 2751488}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415616, "linear_attention_total": 2359296, "linear_attention_nnz": 576512, "linear_dense_total": 4718592, "linear_dense_nnz": 1839104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211392, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619520, "linear_attention_total": 2359296, "linear_attention_nnz": 317440, "linear_dense_total": 4718592, "linear_dense_nnz": 302080}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 623616, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 386048}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 50.93314103235074, "linear_sparsity": 65.25848765432099}, "speed": {"eval_elapsed_time": 21.071897589135915}, "opt_eval_metrics": {"exact_match": 77.70104068117313, "f1": 85.88451743537976}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 44512539, "linear_total": 84934656, "linear_nnz": 20599296, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1869312, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 1254912}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2022400, "linear_attention_total": 2359296, "linear_attention_nnz": 612352, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2259968, "linear_attention_total": 2359296, "linear_attention_nnz": 699392, "linear_dense_total": 4718592, "linear_dense_nnz": 1560576}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2543616, "linear_attention_total": 2359296, "linear_attention_nnz": 1010688, "linear_dense_total": 4718592, "linear_dense_nnz": 1532928}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2463744, "linear_attention_total": 2359296, "linear_attention_nnz": 927744, "linear_dense_total": 4718592, "linear_dense_nnz": 1536000}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2345472, "linear_attention_total": 2359296, "linear_attention_nnz": 872448, "linear_dense_total": 4718592, "linear_dense_nnz": 1473024}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2036224, "linear_attention_total": 2359296, "linear_attention_nnz": 867328, "linear_dense_total": 4718592, "linear_dense_nnz": 1168896}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1800192, "linear_attention_total": 2359296, "linear_attention_nnz": 897024, "linear_dense_total": 4718592, "linear_dense_nnz": 903168}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1165824, "linear_attention_total": 2359296, "linear_attention_nnz": 642048, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 734208, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 576512, "linear_attention_total": 2359296, "linear_attention_nnz": 386048, "linear_dense_total": 4718592, "linear_dense_nnz": 190464}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 781824, "linear_attention_total": 2359296, "linear_attention_nnz": 261120, "linear_dense_total": 4718592, "linear_dense_nnz": 520704}}, "total_sparsity": 59.12275080279128, "linear_sparsity": 75.7468894675926}, "speed": {"eval_elapsed_time": 17.206276496173814}, "opt_eval_metrics": {"exact_match": 79.36613055818354, "f1": 87.31339978481493}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 40733175, "linear_total": 84934656, "linear_nnz": 16822784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1394688, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 847872}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1640960, "linear_attention_total": 2359296, "linear_attention_nnz": 539648, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1878528, "linear_attention_total": 2359296, "linear_attention_nnz": 657408, "linear_dense_total": 4718592, "linear_dense_nnz": 1221120}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 1211904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2143744, "linear_attention_total": 2359296, "linear_attention_nnz": 864256, "linear_dense_total": 4718592, "linear_dense_nnz": 1279488}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1902592, "linear_attention_total": 2359296, "linear_attention_nnz": 686080, "linear_dense_total": 4718592, "linear_dense_nnz": 1216512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601536, "linear_attention_total": 2359296, "linear_attention_nnz": 649216, "linear_dense_total": 4718592, "linear_dense_nnz": 952320}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1507328, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 715776}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908800, "linear_attention_total": 2359296, "linear_attention_nnz": 474112, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 435200, "linear_dense_total": 4718592, "linear_dense_nnz": 172032}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 354304, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591872, "linear_attention_total": 2359296, "linear_attention_nnz": 226304, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}}, "total_sparsity": 62.593458327135366, "linear_sparsity": 80.19326292438271}, "speed": {"eval_elapsed_time": 16.06849605194293}, "opt_eval_metrics": {"exact_match": 78.68495742667928, "f1": 86.66781681977909}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40239113, "linear_total": 84934656, "linear_nnz": 16329216, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1344512, "linear_attention_total": 2359296, "linear_attention_nnz": 518144, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 516096, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842176, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1204224}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2097664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1184256}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2056192, "linear_attention_total": 2359296, "linear_attention_nnz": 790528, "linear_dense_total": 4718592, "linear_dense_nnz": 1265664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 664576, "linear_dense_total": 4718592, "linear_dense_nnz": 1201152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1565184, "linear_attention_total": 2359296, "linear_attention_nnz": 629760, "linear_dense_total": 4718592, "linear_dense_nnz": 935424}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1486336, "linear_attention_total": 2359296, "linear_attention_nnz": 787456, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 844288, "linear_attention_total": 2359296, "linear_attention_nnz": 415744, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 423936, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 472064, "linear_attention_total": 2359296, "linear_attention_nnz": 324608, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555520, "linear_attention_total": 2359296, "linear_attention_nnz": 209920, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}}, "total_sparsity": 63.04717083032174, "linear_sparsity": 80.7743778935185}, "speed": {"eval_elapsed_time": 15.895570316817611}, "opt_eval_metrics": {"exact_match": 78.80794701986756, "f1": 86.74156854566804}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 46385410, "linear_total": 84934656, "linear_nnz": 22426880, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2004992, "linear_attention_total": 2359296, "linear_attention_nnz": 594944, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2177280, "linear_attention_total": 2359296, "linear_attention_nnz": 672000, "linear_dense_total": 4718592, "linear_dense_nnz": 1505280}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2592256, "linear_attention_total": 2359296, "linear_attention_nnz": 859648, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2838016, "linear_attention_total": 2359296, "linear_attention_nnz": 1172992, "linear_dense_total": 4718592, "linear_dense_nnz": 1665024}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2712832, "linear_attention_total": 2359296, "linear_attention_nnz": 1026304, "linear_dense_total": 4718592, "linear_dense_nnz": 1686528}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2600448, "linear_attention_total": 2359296, "linear_attention_nnz": 976896, "linear_dense_total": 4718592, "linear_dense_nnz": 1623552}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2246144, "linear_attention_total": 2359296, "linear_attention_nnz": 955904, "linear_dense_total": 4718592, "linear_dense_nnz": 1290240}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842688, "linear_attention_total": 2359296, "linear_attention_nnz": 901120, "linear_dense_total": 4718592, "linear_dense_nnz": 941568}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1241856, "linear_attention_total": 2359296, "linear_attention_nnz": 718080, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 693760, "linear_attention_total": 2359296, "linear_attention_nnz": 475648, "linear_dense_total": 4718592, "linear_dense_nnz": 218112}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640768, "linear_attention_total": 2359296, "linear_attention_nnz": 441088, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835840, "linear_attention_total": 2359296, "linear_attention_nnz": 246016, "linear_dense_total": 4718592, "linear_dense_nnz": 589824}}, "total_sparsity": 57.4028351048522, "linear_sparsity": 73.59513647762346}, "speed": {"eval_elapsed_time": 19.631975691067055}, "opt_eval_metrics": {"exact_match": 79.34720908230842, "f1": 87.05154033961743}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 43189250, "linear_total": 84934656, "linear_nnz": 19230720, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1584896, "linear_attention_total": 2359296, "linear_attention_nnz": 494336, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1917184, "linear_attention_total": 2359296, "linear_attention_nnz": 631552, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2092032, "linear_attention_total": 2359296, "linear_attention_nnz": 648192, "linear_dense_total": 4718592, "linear_dense_nnz": 1443840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2466816, "linear_attention_total": 2359296, "linear_attention_nnz": 1047552, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2403328, "linear_attention_total": 2359296, "linear_attention_nnz": 942592, "linear_dense_total": 4718592, "linear_dense_nnz": 1460736}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2211072, "linear_attention_total": 2359296, "linear_attention_nnz": 837888, "linear_dense_total": 4718592, "linear_dense_nnz": 1373184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1936640, "linear_attention_total": 2359296, "linear_attention_nnz": 841472, "linear_dense_total": 4718592, "linear_dense_nnz": 1095168}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661440, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 827904}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1084160, "linear_attention_total": 2359296, "linear_attention_nnz": 621824, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621056, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 188928}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 568064, "linear_attention_total": 2359296, "linear_attention_nnz": 411392, "linear_dense_total": 4718592, "linear_dense_nnz": 156672}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 684032, "linear_attention_total": 2359296, "linear_attention_nnz": 223232, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 60.33796825450584, "linear_sparsity": 77.3582175925926}, "speed": {"eval_elapsed_time": 18.20940860803239}, "opt_eval_metrics": {"exact_match": 78.73226111636707, "f1": 86.74884583609185}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42070530, "linear_total": 84934656, "linear_nnz": 18112000, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451008, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 970752}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835264, "linear_attention_total": 2359296, "linear_attention_nnz": 620288, "linear_dense_total": 4718592, "linear_dense_nnz": 1214976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2000384, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1374720}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2271232, "linear_attention_total": 2359296, "linear_attention_nnz": 933376, "linear_dense_total": 4718592, "linear_dense_nnz": 1337856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2267904, "linear_attention_total": 2359296, "linear_attention_nnz": 862464, "linear_dense_total": 4718592, "linear_dense_nnz": 1405440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081536, "linear_attention_total": 2359296, "linear_attention_nnz": 783616, "linear_dense_total": 4718592, "linear_dense_nnz": 1297920}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1807104, "linear_attention_total": 2359296, "linear_attention_nnz": 773376, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602048, "linear_attention_total": 2359296, "linear_attention_nnz": 811008, "linear_dense_total": 4718592, "linear_dense_nnz": 791040}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1009920, "linear_attention_total": 2359296, "linear_attention_nnz": 572160, "linear_dense_total": 4718592, "linear_dense_nnz": 437760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 543232, "linear_attention_total": 2359296, "linear_attention_nnz": 392704, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649472, "linear_attention_total": 2359296, "linear_attention_nnz": 214784, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}}, "total_sparsity": 61.365323630075444, "linear_sparsity": 78.67537133487654}, "speed": {"eval_elapsed_time": 18.07671318203211}, "opt_eval_metrics": {"exact_match": 78.97824030274361, "f1": 86.77789246016766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41670402, "linear_total": 84934656, "linear_nnz": 17711872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1409024, "linear_attention_total": 2359296, "linear_attention_nnz": 468992, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1792000, "linear_attention_total": 2359296, "linear_attention_nnz": 606208, "linear_dense_total": 4718592, "linear_dense_nnz": 1185792}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1974272, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1348608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2231552, "linear_attention_total": 2359296, "linear_attention_nnz": 910592, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2209536, "linear_attention_total": 2359296, "linear_attention_nnz": 828672, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2046464, "linear_attention_total": 2359296, "linear_attention_nnz": 765440, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1764096, "linear_attention_total": 2359296, "linear_attention_nnz": 761088, "linear_dense_total": 4718592, "linear_dense_nnz": 1003008}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1573120, "linear_attention_total": 2359296, "linear_attention_nnz": 792832, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 986880, "linear_attention_total": 2359296, "linear_attention_nnz": 553728, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 572672, "linear_attention_total": 2359296, "linear_attention_nnz": 389888, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 525568, "linear_attention_total": 2359296, "linear_attention_nnz": 378112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}}, "total_sparsity": 61.73277361909495, "linear_sparsity": 79.14647231867285}, "speed": {"eval_elapsed_time": 17.641912897117436}, "opt_eval_metrics": {"exact_match": 78.74172185430463, "f1": 86.69521763053608}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41478658, "linear_total": 84934656, "linear_nnz": 17520128, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404160, "linear_attention_total": 2359296, "linear_attention_nnz": 465664, "linear_dense_total": 4718592, "linear_dense_nnz": 938496}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1766912, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1182720}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1961216, "linear_attention_total": 2359296, "linear_attention_nnz": 615680, "linear_dense_total": 4718592, "linear_dense_nnz": 1345536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 895488, "linear_dense_total": 4718592, "linear_dense_nnz": 1314816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2189824, "linear_attention_total": 2359296, "linear_attention_nnz": 812032, "linear_dense_total": 4718592, "linear_dense_nnz": 1377792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2038016, "linear_attention_total": 2359296, "linear_attention_nnz": 755456, "linear_dense_total": 4718592, "linear_dense_nnz": 1282560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1738240, "linear_attention_total": 2359296, "linear_attention_nnz": 739840, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571584, "linear_attention_total": 2359296, "linear_attention_nnz": 797440, "linear_dense_total": 4718592, "linear_dense_nnz": 774144}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 943872, "linear_attention_total": 2359296, "linear_attention_nnz": 513792, "linear_dense_total": 4718592, "linear_dense_nnz": 430080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563968, "linear_attention_total": 2359296, "linear_attention_nnz": 381184, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516352, "linear_attention_total": 2359296, "linear_attention_nnz": 370432, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 615680, "linear_attention_total": 2359296, "linear_attention_nnz": 200960, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}}, "total_sparsity": 61.90885809879785, "linear_sparsity": 79.37222704475309}, "speed": {"eval_elapsed_time": 17.640617809956893}, "opt_eval_metrics": {"exact_match": 78.84578997161779, "f1": 86.78133258210022}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40218943, "linear_total": 84934656, "linear_nnz": 16260413, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725694, "linear_attention_total": 2359296, "linear_attention_nnz": 36794, "linear_dense_total": 4718592, "linear_dense_nnz": 1688900}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959620, "linear_attention_total": 2359296, "linear_attention_nnz": 233028, "linear_dense_total": 4718592, "linear_dense_nnz": 1726592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969125, "linear_attention_total": 2359296, "linear_attention_nnz": 194318, "linear_dense_total": 4718592, "linear_dense_nnz": 1774807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2012358, "linear_attention_total": 2359296, "linear_attention_nnz": 270153, "linear_dense_total": 4718592, "linear_dense_nnz": 1742205}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860862, "linear_attention_total": 2359296, "linear_attention_nnz": 207935, "linear_dense_total": 4718592, "linear_dense_nnz": 1652927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815188, "linear_attention_total": 2359296, "linear_attention_nnz": 215427, "linear_dense_total": 4718592, "linear_dense_nnz": 1599761}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518978, "linear_attention_total": 2359296, "linear_attention_nnz": 114563, "linear_dense_total": 4718592, "linear_dense_nnz": 1404415}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307646, "linear_attention_total": 2359296, "linear_attention_nnz": 165011, "linear_dense_total": 4718592, "linear_dense_nnz": 1142635}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 946142, "linear_attention_total": 2359296, "linear_attention_nnz": 86589, "linear_dense_total": 4718592, "linear_dense_nnz": 859553}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531809, "linear_attention_total": 2359296, "linear_attention_nnz": 110020, "linear_dense_total": 4718592, "linear_dense_nnz": 421789}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 419075, "linear_attention_total": 2359296, "linear_attention_nnz": 89475, "linear_dense_total": 4718592, "linear_dense_nnz": 329600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 193916, "linear_attention_total": 2359296, "linear_attention_nnz": 45791, "linear_dense_total": 4718592, "linear_dense_nnz": 148125}}, "total_sparsity": 63.065693568741764, "linear_sparsity": 80.85538487375518}, "speed": {"eval_elapsed_time": 25.115268317982554}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.30981160352648}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39702836, "linear_total": 84934656, "linear_nnz": 15791104, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125888, "linear_attention_total": 2359296, "linear_attention_nnz": 838656, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1188352, "linear_attention_total": 2359296, "linear_attention_nnz": 692224, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1694720, "linear_attention_total": 2359296, "linear_attention_nnz": 1089536, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1962496, "linear_attention_total": 2359296, "linear_attention_nnz": 1291264, "linear_dense_total": 4718592, "linear_dense_nnz": 671232}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2112512, "linear_attention_total": 2359296, "linear_attention_nnz": 1384448, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1783296, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1632768, "linear_attention_total": 2359296, "linear_attention_nnz": 1127424, "linear_dense_total": 4718592, "linear_dense_nnz": 505344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1333760, "linear_attention_total": 2359296, "linear_attention_nnz": 942080, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1204736, "linear_attention_total": 2359296, "linear_attention_nnz": 982016, "linear_dense_total": 4718592, "linear_dense_nnz": 222720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 729600, "linear_attention_total": 2359296, "linear_attention_nnz": 645120, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 573952, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 84480}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 449024, "linear_attention_total": 2359296, "linear_attention_nnz": 293888, "linear_dense_total": 4718592, "linear_dense_nnz": 155136}}, "total_sparsity": 63.53965068117302, "linear_sparsity": 81.40793788580247}, "speed": {"eval_elapsed_time": 17.754389239940792}, "opt_eval_metrics": {"exact_match": 79.20529801324503, "f1": 87.11181141207972}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 30451970, "linear_total": 84934656, "linear_nnz": 6493440, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 490240, "linear_attention_total": 2359296, "linear_attention_nnz": 259840, "linear_dense_total": 4718592, "linear_dense_nnz": 230400}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591104, "linear_attention_total": 2359296, "linear_attention_nnz": 225536, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 770048, "linear_attention_total": 2359296, "linear_attention_nnz": 286208, "linear_dense_total": 4718592, "linear_dense_nnz": 483840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 863488, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 747008, "linear_attention_total": 2359296, "linear_attention_nnz": 214016, "linear_dense_total": 4718592, "linear_dense_nnz": 532992}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 763392, "linear_attention_total": 2359296, "linear_attention_nnz": 285696, "linear_dense_total": 4718592, "linear_dense_nnz": 477696}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 499456, "linear_attention_total": 2359296, "linear_attention_nnz": 113920, "linear_dense_total": 4718592, "linear_dense_nnz": 385536}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 650752, "linear_attention_total": 2359296, "linear_attention_nnz": 303616, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 407808, "linear_attention_total": 2359296, "linear_attention_nnz": 162048, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 293888, "linear_attention_total": 2359296, "linear_attention_nnz": 206336, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 206336, "linear_attention_total": 2359296, "linear_attention_nnz": 117248, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 209920, "linear_attention_total": 2359296, "linear_attention_nnz": 103936, "linear_dense_total": 4718592, "linear_dense_nnz": 105984}}, "total_sparsity": 72.03500869191208, "linear_sparsity": 92.35478153935185}, "speed": {"eval_elapsed_time": 11.066904521081597}, "opt_eval_metrics": {"exact_match": 74.01135288552507, "f1": 83.09229054053787}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 30144002, "linear_total": 84934656, "linear_nnz": 6185472, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 462848, "linear_attention_total": 2359296, "linear_attention_nnz": 237056, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566784, "linear_attention_total": 2359296, "linear_attention_nnz": 219648, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 744960, "linear_attention_total": 2359296, "linear_attention_nnz": 278016, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 823552, "linear_attention_total": 2359296, "linear_attention_nnz": 379648, "linear_dense_total": 4718592, "linear_dense_nnz": 443904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708352, "linear_attention_total": 2359296, "linear_attention_nnz": 193792, "linear_dense_total": 4718592, "linear_dense_nnz": 514560}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 703488, "linear_attention_total": 2359296, "linear_attention_nnz": 247296, "linear_dense_total": 4718592, "linear_dense_nnz": 456192}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488448, "linear_attention_total": 2359296, "linear_attention_nnz": 118272, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 636160, "linear_attention_total": 2359296, "linear_attention_nnz": 296704, "linear_dense_total": 4718592, "linear_dense_nnz": 339456}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 393728, "linear_attention_total": 2359296, "linear_attention_nnz": 152576, "linear_dense_total": 4718592, "linear_dense_nnz": 241152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 278528, "linear_attention_total": 2359296, "linear_attention_nnz": 190976, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 190976, "linear_attention_total": 2359296, "linear_attention_nnz": 104960, "linear_dense_total": 4718592, "linear_dense_nnz": 86016}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 187648, "linear_attention_total": 2359296, "linear_attention_nnz": 90880, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 72.31782528614784, "linear_sparsity": 92.7173755787037}, "speed": {"eval_elapsed_time": 11.027097067097202}, "opt_eval_metrics": {"exact_match": 73.53831598864711, "f1": 82.75790566079003}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 30105858, "linear_total": 84934656, "linear_nnz": 6147328, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460800, "linear_attention_total": 2359296, "linear_attention_nnz": 235008, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566272, "linear_attention_total": 2359296, "linear_attention_nnz": 220672, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727040, "linear_attention_total": 2359296, "linear_attention_nnz": 260096, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 821760, "linear_attention_total": 2359296, "linear_attention_nnz": 380928, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 718080, "linear_attention_total": 2359296, "linear_attention_nnz": 215808, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 706048, "linear_attention_total": 2359296, "linear_attention_nnz": 251392, "linear_dense_total": 4718592, "linear_dense_nnz": 454656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 475648, "linear_attention_total": 2359296, "linear_attention_nnz": 103936, "linear_dense_total": 4718592, "linear_dense_nnz": 371712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 624384, "linear_attention_total": 2359296, "linear_attention_nnz": 284928, "linear_dense_total": 4718592, "linear_dense_nnz": 339456}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 388608, "linear_attention_total": 2359296, "linear_attention_nnz": 147456, "linear_dense_total": 4718592, "linear_dense_nnz": 241152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 268800, "linear_attention_total": 2359296, "linear_attention_nnz": 181248, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 199168, "linear_attention_total": 2359296, "linear_attention_nnz": 113152, "linear_dense_total": 4718592, "linear_dense_nnz": 86016}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 190720, "linear_attention_total": 2359296, "linear_attention_nnz": 93952, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 72.35285410787779, "linear_sparsity": 92.76228539737654}, "speed": {"eval_elapsed_time": 11.011191483819857}, "opt_eval_metrics": {"exact_match": 73.03689687795648, "f1": 82.25143344147253}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41908226, "linear_total": 84934656, "linear_nnz": 17949696, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081280, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 606720}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529856, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 841728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2437632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2115072, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1927680, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1448448, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 668160}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665088, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 173568}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 595968, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.51437244200017, "linear_sparsity": 78.86646412037037}, "speed": {"eval_elapsed_time": 12.991677745943889}, "opt_eval_metrics": {"exact_match": 78.05108798486282, "f1": 85.81174728555466}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41954306, "linear_total": 84934656, "linear_nnz": 17995776, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2079744, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1626624, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 840192}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434560, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 1058304}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2116608, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 1035264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1829376, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 1092096}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886208, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1099776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497600, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1210368, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1178112, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 391680}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 764928, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 546816, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 202752}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 824832, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.4720557446083, "linear_sparsity": 78.81221064814815}, "speed": {"eval_elapsed_time": 12.87139375694096}, "opt_eval_metrics": {"exact_match": 77.62535477767265, "f1": 85.49958980627748}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 38322466, "linear_total": 84934656, "linear_nnz": 14411776, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1015808, "linear_attention_total": 2359296, "linear_attention_nnz": 498688, "linear_dense_total": 4718592, "linear_dense_nnz": 517120}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1176576, "linear_attention_total": 2359296, "linear_attention_nnz": 510976, "linear_dense_total": 4718592, "linear_dense_nnz": 665600}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1846272, "linear_attention_total": 2359296, "linear_attention_nnz": 708608, "linear_dense_total": 4718592, "linear_dense_nnz": 1137664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1953792, "linear_attention_total": 2359296, "linear_attention_nnz": 832512, "linear_dense_total": 4718592, "linear_dense_nnz": 1121280}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1854464, "linear_attention_total": 2359296, "linear_attention_nnz": 739328, "linear_dense_total": 4718592, "linear_dense_nnz": 1115136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1754112, "linear_attention_total": 2359296, "linear_attention_nnz": 576512, "linear_dense_total": 4718592, "linear_dense_nnz": 1177600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522688, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 920576}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1283072, "linear_attention_total": 2359296, "linear_attention_nnz": 728064, "linear_dense_total": 4718592, "linear_dense_nnz": 555008}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 641024, "linear_attention_total": 2359296, "linear_attention_nnz": 465920, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455680, "linear_attention_total": 2359296, "linear_attention_nnz": 357376, "linear_dense_total": 4718592, "linear_dense_nnz": 98304}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496640, "linear_attention_total": 2359296, "linear_attention_nnz": 311296, "linear_dense_total": 4718592, "linear_dense_nnz": 185344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411648, "linear_attention_total": 2359296, "linear_attention_nnz": 199680, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}}, "total_sparsity": 64.80728739078312, "linear_sparsity": 83.03192515432099}, "speed": {"eval_elapsed_time": 16.777261161943898}, "opt_eval_metrics": {"exact_match": 75.54399243140965, "f1": 84.18974712714544}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 37814786, "linear_total": 84934656, "linear_nnz": 13904896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 974848, "linear_attention_total": 2359296, "linear_attention_nnz": 489472, "linear_dense_total": 4718592, "linear_dense_nnz": 485376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1168384, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1829888, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 1119232}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1906688, "linear_attention_total": 2359296, "linear_attention_nnz": 851968, "linear_dense_total": 4718592, "linear_dense_nnz": 1054720}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1767424, "linear_attention_total": 2359296, "linear_attention_nnz": 708608, "linear_dense_total": 4718592, "linear_dense_nnz": 1058816}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1684480, "linear_attention_total": 2359296, "linear_attention_nnz": 556032, "linear_dense_total": 4718592, "linear_dense_nnz": 1128448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1434624, "linear_attention_total": 2359296, "linear_attention_nnz": 569344, "linear_dense_total": 4718592, "linear_dense_nnz": 865280}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1205248, "linear_attention_total": 2359296, "linear_attention_nnz": 692224, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 635904, "linear_attention_total": 2359296, "linear_attention_nnz": 470016, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 430080, "linear_attention_total": 2359296, "linear_attention_nnz": 347136, "linear_dense_total": 4718592, "linear_dense_nnz": 82944}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 312320, "linear_dense_total": 4718592, "linear_dense_nnz": 189440}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 365568, "linear_attention_total": 2359296, "linear_attention_nnz": 174080, "linear_dense_total": 4718592, "linear_dense_nnz": 191488}}, "total_sparsity": 65.2735057269791, "linear_sparsity": 83.62871334876543}, "speed": {"eval_elapsed_time": 16.61875824886374}, "opt_eval_metrics": {"exact_match": 75.44938505203406, "f1": 84.0707510238674}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36247394, "linear_total": 84934656, "linear_nnz": 12339200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 897024, "linear_attention_total": 2359296, "linear_attention_nnz": 492544, "linear_dense_total": 4718592, "linear_dense_nnz": 404480}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1039360, "linear_attention_total": 2359296, "linear_attention_nnz": 488448, "linear_dense_total": 4718592, "linear_dense_nnz": 550912}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1658880, "linear_attention_total": 2359296, "linear_attention_nnz": 657408, "linear_dense_total": 4718592, "linear_dense_nnz": 1001472}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1664000, "linear_attention_total": 2359296, "linear_attention_nnz": 775168, "linear_dense_total": 4718592, "linear_dense_nnz": 888832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1506304, "linear_attention_total": 2359296, "linear_attention_nnz": 564224, "linear_dense_total": 4718592, "linear_dense_nnz": 942080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1456128, "linear_attention_total": 2359296, "linear_attention_nnz": 494592, "linear_dense_total": 4718592, "linear_dense_nnz": 961536}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1302528, "linear_attention_total": 2359296, "linear_attention_nnz": 512000, "linear_dense_total": 4718592, "linear_dense_nnz": 790528}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1057792, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 419840}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 414720, "linear_dense_total": 4718592, "linear_dense_nnz": 156672}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 393216, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 79872}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 448512, "linear_attention_total": 2359296, "linear_attention_nnz": 267264, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 344064, "linear_attention_total": 2359296, "linear_attention_nnz": 153600, "linear_dense_total": 4718592, "linear_dense_nnz": 190464}}, "total_sparsity": 66.71289055680674, "linear_sparsity": 85.47212577160494}, "speed": {"eval_elapsed_time": 16.059992676833645}, "opt_eval_metrics": {"exact_match": 75.57237464522233, "f1": 84.02544962299854}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 36346370, "linear_total": 84934656, "linear_nnz": 12387840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1721856, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 950784, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1857024, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437696, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1113600, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1015296, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 425472}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 731136, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 999936, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 213504}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 296448, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 99840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 614400, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 122880}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 175104}}, "total_sparsity": 66.62199781720042, "linear_sparsity": 85.4148582175926}, "speed": {"eval_elapsed_time": 11.044030340854079}, "opt_eval_metrics": {"exact_match": 76.75496688741723, "f1": 84.83470649534952}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 35750402, "linear_total": 84934656, "linear_nnz": 11791872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893952, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1247232, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 737280, "linear_dense_total": 4718592, "linear_dense_nnz": 520704}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1070592, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 480768}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 978432, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 388608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 605184, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 365568, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}}, "total_sparsity": 67.16929377013544, "linear_sparsity": 86.11653645833334}, "speed": {"eval_elapsed_time": 10.875461397925392}, "opt_eval_metrics": {"exact_match": 76.3197729422895, "f1": 84.62201750681498}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 35662850, "linear_total": 84934656, "linear_nnz": 11704320, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1588224, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 897024, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 258048}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804800, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 428544}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1184256, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 496128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1064448, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 474624}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 288768, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 364032, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}}, "total_sparsity": 67.24969549518002, "linear_sparsity": 86.21961805555556}, "speed": {"eval_elapsed_time": 10.863983491901308}, "opt_eval_metrics": {"exact_match": 76.5279091769158, "f1": 84.6776690586996}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35638274, "linear_total": 84934656, "linear_nnz": 11679744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1586688, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 210432}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887808, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 248832}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1803264, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 427008}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1244160, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 457728}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1175040, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 486912}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1062912, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 976896, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705024, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 984576, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 290304, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 93696}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 602112, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360960, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 164352}}, "total_sparsity": 67.27226440045568, "linear_sparsity": 86.24855324074075}, "speed": {"eval_elapsed_time": 10.856388033833355}, "opt_eval_metrics": {"exact_match": 76.31031220435194, "f1": 84.63605545666391}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 36176130, "linear_total": 84934656, "linear_nnz": 12217600, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 911360, "linear_attention_total": 2359296, "linear_attention_nnz": 352256, "linear_dense_total": 4718592, "linear_dense_nnz": 559104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1284864, "linear_attention_total": 2359296, "linear_attention_nnz": 478464, "linear_dense_total": 4718592, "linear_dense_nnz": 806400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1386240, "linear_attention_total": 2359296, "linear_attention_nnz": 461568, "linear_dense_total": 4718592, "linear_dense_nnz": 924672}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1616128, "linear_attention_total": 2359296, "linear_attention_nnz": 763648, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1410048, "linear_attention_total": 2359296, "linear_attention_nnz": 451584, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1415424, "linear_attention_total": 2359296, "linear_attention_nnz": 509184, "linear_dense_total": 4718592, "linear_dense_nnz": 906240}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1157632, "linear_attention_total": 2359296, "linear_attention_nnz": 458752, "linear_dense_total": 4718592, "linear_dense_nnz": 698880}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1138688, "linear_attention_total": 2359296, "linear_attention_nnz": 550400, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 661504, "linear_attention_total": 2359296, "linear_attention_nnz": 311296, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 448512, "linear_attention_total": 2359296, "linear_attention_nnz": 319488, "linear_dense_total": 4718592, "linear_dense_nnz": 129024}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 379904, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 407296, "linear_attention_total": 2359296, "linear_attention_nnz": 160000, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}}, "total_sparsity": 66.7783345047871, "linear_sparsity": 85.6152946566358}, "speed": {"eval_elapsed_time": 15.005711378064007}, "opt_eval_metrics": {"exact_match": 76.39545884578997, "f1": 84.89462085525314}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33844482, "linear_total": 84934656, "linear_nnz": 9885952, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701184, "linear_attention_total": 2359296, "linear_attention_nnz": 295680, "linear_dense_total": 4718592, "linear_dense_nnz": 405504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042688, "linear_attention_total": 2359296, "linear_attention_nnz": 380672, "linear_dense_total": 4718592, "linear_dense_nnz": 662016}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1087744, "linear_attention_total": 2359296, "linear_attention_nnz": 328960, "linear_dense_total": 4718592, "linear_dense_nnz": 758784}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1340672, "linear_attention_total": 2359296, "linear_attention_nnz": 612608, "linear_dense_total": 4718592, "linear_dense_nnz": 728064}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1142784, "linear_attention_total": 2359296, "linear_attention_nnz": 331776, "linear_dense_total": 4718592, "linear_dense_nnz": 811008}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1165312, "linear_attention_total": 2359296, "linear_attention_nnz": 411136, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 908032, "linear_attention_total": 2359296, "linear_attention_nnz": 319744, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 956672, "linear_attention_total": 2359296, "linear_attention_nnz": 457472, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 557056, "linear_attention_total": 2359296, "linear_attention_nnz": 246784, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 360192, "linear_attention_total": 2359296, "linear_attention_nnz": 252672, "linear_dense_total": 4718592, "linear_dense_nnz": 107520}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315904, "linear_attention_total": 2359296, "linear_attention_nnz": 202240, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 307712, "linear_attention_total": 2359296, "linear_attention_nnz": 129536, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}}, "total_sparsity": 68.91955939281638, "linear_sparsity": 88.36052035108025}, "speed": {"eval_elapsed_time": 14.372085305862129}, "opt_eval_metrics": {"exact_match": 76.33869441816462, "f1": 84.90005817955239}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 50669453, "linear_total": 84934656, "linear_nnz": 26755584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3454464, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1979904}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2895360, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 2108928}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3497472, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2317824}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2995200, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 2110464}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2832384, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 2045952}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2528256, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 2036736}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2221056, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1631232}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1691136, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1297920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1684992, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 374784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1082880, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 591360}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1300992, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1006080}}, "total_sparsity": 53.46866515596302, "linear_sparsity": 68.4986255787037}, "speed": {"eval_elapsed_time": 15.184518757043406}, "opt_eval_metrics": {"exact_match": 78.12677388836329, "f1": 86.09062317714458}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 46753113, "linear_total": 84934656, "linear_nnz": 22841856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2904576, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1430016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2368512, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1582080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3084288, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1904640}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2600448, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1715712}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2244096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1654272}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096640, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1703424}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1910784, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1476096, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1082880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1534464, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 748032}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523776, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 327168}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 990720, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1107456, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 812544}}, "total_sparsity": 57.06516200196401, "linear_sparsity": 73.10655381944444}, "speed": {"eval_elapsed_time": 13.856825530063361}, "opt_eval_metrics": {"exact_match": 78.31598864711448, "f1": 86.14732314693939}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 46580969, "linear_total": 84934656, "linear_nnz": 22669824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2883072, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1408512}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2342400, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1555968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3055104, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1875456}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2585088, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1700352}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225664, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1635840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2082816, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1689600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1901568, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1311744}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1468416, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1075200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1528320, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 741888}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520704, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 324096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1093632, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}}, "total_sparsity": 57.22324719197764, "linear_sparsity": 73.30910011574075}, "speed": {"eval_elapsed_time": 13.743963541928679}, "opt_eval_metrics": {"exact_match": 77.96594134342479, "f1": 85.85795020085484}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 46293486, "linear_total": 84934656, "linear_nnz": 22382592, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850816, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1376256}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2323968, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1537536}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3022848, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1843200}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2557440, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1672704}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1620480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2068992, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1675776}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790976, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1299456}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1462272, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1523712, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 737280}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 322560}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 970752, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 479232}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 786432}}, "total_sparsity": 57.487251773494805, "linear_sparsity": 73.6472800925926}, "speed": {"eval_elapsed_time": 13.430293028010055}, "opt_eval_metrics": {"exact_match": 77.88079470198676, "f1": 85.81326419854291}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl300_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 26853628, "linear_total": 84934656, "linear_nnz": 2895098, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 357857, "linear_attention_total": 2359296, "linear_attention_nnz": 13195, "linear_dense_total": 4718592, "linear_dense_nnz": 344662}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 405482, "linear_attention_total": 2359296, "linear_attention_nnz": 53357, "linear_dense_total": 4718592, "linear_dense_nnz": 352125}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 395119, "linear_attention_total": 2359296, "linear_attention_nnz": 43981, "linear_dense_total": 4718592, "linear_dense_nnz": 351138}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 394117, "linear_attention_total": 2359296, "linear_attention_nnz": 71058, "linear_dense_total": 4718592, "linear_dense_nnz": 323059}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 335373, "linear_attention_total": 2359296, "linear_attention_nnz": 47705, "linear_dense_total": 4718592, "linear_dense_nnz": 287668}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 292526, "linear_attention_total": 2359296, "linear_attention_nnz": 40348, "linear_dense_total": 4718592, "linear_dense_nnz": 252178}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 238114, "linear_attention_total": 2359296, "linear_attention_nnz": 33002, "linear_dense_total": 4718592, "linear_dense_nnz": 205112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 188891, "linear_attention_total": 2359296, "linear_attention_nnz": 38753, "linear_dense_total": 4718592, "linear_dense_nnz": 150138}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 123365, "linear_attention_total": 2359296, "linear_attention_nnz": 22052, "linear_dense_total": 4718592, "linear_dense_nnz": 101313}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 64415, "linear_attention_total": 2359296, "linear_attention_nnz": 28498, "linear_dense_total": 4718592, "linear_dense_nnz": 35917}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 53470, "linear_attention_total": 2359296, "linear_attention_nnz": 18747, "linear_dense_total": 4718592, "linear_dense_nnz": 34723}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 46369, "linear_attention_total": 2359296, "linear_attention_nnz": 15957, "linear_dense_total": 4718592, "linear_dense_nnz": 30412}}, "total_sparsity": 75.33947808267818, "linear_sparsity": 96.59138196780358}, "speed": {"eval_elapsed_time": 25.971711199032143}, "opt_eval_metrics": {"exact_match": 76.98202459791864, "f1": 85.40699359564026}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 40641026, "linear_total": 84934656, "linear_nnz": 16682496, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1930752, "linear_attention_total": 2359296, "linear_attention_nnz": 1390080, "linear_dense_total": 4718592, "linear_dense_nnz": 540672}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1347840, "linear_attention_total": 2359296, "linear_attention_nnz": 622848, "linear_dense_total": 4718592, "linear_dense_nnz": 724992}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2423808, "linear_attention_total": 2359296, "linear_attention_nnz": 1506816, "linear_dense_total": 4718592, "linear_dense_nnz": 916992}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1864704, "linear_attention_total": 2359296, "linear_attention_nnz": 966144, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1956096, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 990720}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742592, "linear_attention_total": 2359296, "linear_attention_nnz": 734976, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1323264, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 761856}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835584, "linear_attention_total": 2359296, "linear_attention_nnz": 282624, "linear_dense_total": 4718592, "linear_dense_nnz": 552960}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307904, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 403968}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 536064, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 506880, "linear_attention_total": 2359296, "linear_attention_nnz": 322560, "linear_dense_total": 4718592, "linear_dense_nnz": 184320}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 761088, "linear_attention_total": 2359296, "linear_attention_nnz": 412416, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}}, "total_sparsity": 62.67808162027695, "linear_sparsity": 80.3584346064815}, "speed": {"eval_elapsed_time": 13.440584641881287}, "opt_eval_metrics": {"exact_match": 76.13055818353831, "f1": 84.59415607632204}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 39460610, "linear_total": 84934656, "linear_nnz": 15502080, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1801728, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1127424, "linear_attention_total": 2359296, "linear_attention_nnz": 471552, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2341632, "linear_attention_total": 2359296, "linear_attention_nnz": 1507584, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1804032, "linear_attention_total": 2359296, "linear_attention_nnz": 960768, "linear_dense_total": 4718592, "linear_dense_nnz": 843264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899264, "linear_attention_total": 2359296, "linear_attention_nnz": 968448, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1529088, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 930816}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 564480, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738048, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 506880}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277952, "linear_attention_total": 2359296, "linear_attention_nnz": 893952, "linear_dense_total": 4718592, "linear_dense_nnz": 384000}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 668928, "linear_attention_total": 2359296, "linear_attention_nnz": 535296, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488448, "linear_attention_total": 2359296, "linear_attention_nnz": 319488, "linear_dense_total": 4718592, "linear_dense_nnz": 168960}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 560640, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}}, "total_sparsity": 63.76209435179903, "linear_sparsity": 81.7482277199074}, "speed": {"eval_elapsed_time": 13.018812068970874}, "opt_eval_metrics": {"exact_match": 76.20624408703878, "f1": 84.78885528858153}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 39496706, "linear_total": 84934656, "linear_nnz": 15538176, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798656, "linear_attention_total": 2359296, "linear_attention_nnz": 1331712, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1122816, "linear_attention_total": 2359296, "linear_attention_nnz": 473088, "linear_dense_total": 4718592, "linear_dense_nnz": 649728}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2325504, "linear_attention_total": 2359296, "linear_attention_nnz": 1500672, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1790208, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 834048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1886976, "linear_attention_total": 2359296, "linear_attention_nnz": 963840, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1522944, "linear_attention_total": 2359296, "linear_attention_nnz": 598272, "linear_dense_total": 4718592, "linear_dense_nnz": 924672}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 558336, "linear_dense_total": 4718592, "linear_dense_nnz": 700416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 737280, "linear_attention_total": 2359296, "linear_attention_nnz": 235008, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1286400, "linear_attention_total": 2359296, "linear_attention_nnz": 903936, "linear_dense_total": 4718592, "linear_dense_nnz": 382464}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 665856, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 417792, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 559104, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 301056}}, "total_sparsity": 63.72894627217538, "linear_sparsity": 81.70572916666666}, "speed": {"eval_elapsed_time": 12.96851964481175}, "opt_eval_metrics": {"exact_match": 75.67644276253547, "f1": 84.4740049617883}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39419906, "linear_total": 84934656, "linear_nnz": 15461376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1800192, "linear_attention_total": 2359296, "linear_attention_nnz": 1334784, "linear_dense_total": 4718592, "linear_dense_nnz": 465408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1118976, "linear_attention_total": 2359296, "linear_attention_nnz": 473856, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2320896, "linear_attention_total": 2359296, "linear_attention_nnz": 1497600, "linear_dense_total": 4718592, "linear_dense_nnz": 823296}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1788672, "linear_attention_total": 2359296, "linear_attention_nnz": 956160, "linear_dense_total": 4718592, "linear_dense_nnz": 832512}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1891584, "linear_attention_total": 2359296, "linear_attention_nnz": 965376, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1520640, "linear_attention_total": 2359296, "linear_attention_nnz": 600576, "linear_dense_total": 4718592, "linear_dense_nnz": 920064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258752, "linear_attention_total": 2359296, "linear_attention_nnz": 561408, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 732672, "linear_attention_total": 2359296, "linear_attention_nnz": 230400, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1277184, "linear_attention_total": 2359296, "linear_attention_nnz": 897792, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 660480, "linear_attention_total": 2359296, "linear_attention_nnz": 528384, "linear_dense_total": 4718592, "linear_dense_nnz": 132096}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 555264, "linear_attention_total": 2359296, "linear_attention_nnz": 257280, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.79947410116185, "linear_sparsity": 81.79615162037037}, "speed": {"eval_elapsed_time": 12.973318020114675}, "opt_eval_metrics": {"exact_match": 76.0170293282876, "f1": 84.48208063503463}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 39382274, "linear_total": 84934656, "linear_nnz": 15423744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1793280, "linear_attention_total": 2359296, "linear_attention_nnz": 1323264, "linear_dense_total": 4718592, "linear_dense_nnz": 470016}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1115136, "linear_attention_total": 2359296, "linear_attention_nnz": 470016, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2321664, "linear_attention_total": 2359296, "linear_attention_nnz": 1496832, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1789440, "linear_attention_total": 2359296, "linear_attention_nnz": 960000, "linear_dense_total": 4718592, "linear_dense_nnz": 829440}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1843968, "linear_attention_total": 2359296, "linear_attention_nnz": 917760, "linear_dense_total": 4718592, "linear_dense_nnz": 926208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526016, "linear_attention_total": 2359296, "linear_attention_nnz": 607488, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264896, "linear_attention_total": 2359296, "linear_attention_nnz": 567552, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 730368, "linear_attention_total": 2359296, "linear_attention_nnz": 231168, "linear_dense_total": 4718592, "linear_dense_nnz": 499200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1279488, "linear_attention_total": 2359296, "linear_attention_nnz": 900096, "linear_dense_total": 4718592, "linear_dense_nnz": 379392}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 667392, "linear_attention_total": 2359296, "linear_attention_nnz": 533760, "linear_dense_total": 4718592, "linear_dense_nnz": 133632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 536064, "linear_attention_total": 2359296, "linear_attention_nnz": 370176, "linear_dense_total": 4718592, "linear_dense_nnz": 165888}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 556032, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 297984}}, "total_sparsity": 63.83403273736522, "linear_sparsity": 81.84045862268519}, "speed": {"eval_elapsed_time": 12.801363392965868}, "opt_eval_metrics": {"exact_match": 75.93188268684958, "f1": 84.50981123274157}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": {"stats": {"total": 108893186, "nnz": 45409666, "linear_total": 84934656, "linear_nnz": 21492736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1715200, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 1234944}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895424, "linear_attention_total": 2359296, "linear_attention_nnz": 400384, "linear_dense_total": 4718592, "linear_dense_nnz": 1495040}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3012608, "linear_attention_total": 2359296, "linear_attention_nnz": 594944, "linear_dense_total": 4718592, "linear_dense_nnz": 2417664}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3094528, "linear_attention_total": 2359296, "linear_attention_nnz": 813056, "linear_dense_total": 4718592, "linear_dense_nnz": 2281472}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2762752, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 2163712}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2707456, "linear_attention_total": 2359296, "linear_attention_nnz": 562176, "linear_dense_total": 4718592, "linear_dense_nnz": 2145280}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2233344, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 1701888}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1741824, "linear_attention_total": 2359296, "linear_attention_nnz": 678912, "linear_dense_total": 4718592, "linear_dense_nnz": 1062912}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 709632, "linear_attention_total": 2359296, "linear_attention_nnz": 338944, "linear_dense_total": 4718592, "linear_dense_nnz": 370688}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 524288, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 164864}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 593920, "linear_attention_total": 2359296, "linear_attention_nnz": 267264, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 501760, "linear_attention_total": 2359296, "linear_attention_nnz": 163840, "linear_dense_total": 4718592, "linear_dense_nnz": 337920}}, "total_sparsity": 58.29889117212532, "linear_sparsity": 74.6949749228395}, "speed": {"eval_elapsed_time": 18.650014573941007}, "opt_eval_metrics": {"exact_match": 77.01986754966887, "f1": 85.2617013700351}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 44308674, "linear_total": 84934656, "linear_nnz": 20392960, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1598464, "linear_attention_total": 2359296, "linear_attention_nnz": 458752, "linear_dense_total": 4718592, "linear_dense_nnz": 1139712}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1825792, "linear_attention_total": 2359296, "linear_attention_nnz": 398336, "linear_dense_total": 4718592, "linear_dense_nnz": 1427456}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2854912, "linear_attention_total": 2359296, "linear_attention_nnz": 596992, "linear_dense_total": 4718592, "linear_dense_nnz": 2257920}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2905088, "linear_attention_total": 2359296, "linear_attention_nnz": 781312, "linear_dense_total": 4718592, "linear_dense_nnz": 2123776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2643968, "linear_attention_total": 2359296, "linear_attention_nnz": 620544, "linear_dense_total": 4718592, "linear_dense_nnz": 2023424}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2543616, "linear_attention_total": 2359296, "linear_attention_nnz": 573440, "linear_dense_total": 4718592, "linear_dense_nnz": 1970176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2049024, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 1588224}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1708032, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 1069056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 666624, "linear_attention_total": 2359296, "linear_attention_nnz": 307200, "linear_dense_total": 4718592, "linear_dense_nnz": 359424}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 489472, "linear_attention_total": 2359296, "linear_attention_nnz": 327680, "linear_dense_total": 4718592, "linear_dense_nnz": 161792}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598016, "linear_attention_total": 2359296, "linear_attention_nnz": 271360, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 509952, "linear_attention_total": 2359296, "linear_attention_nnz": 162816, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}}, "total_sparsity": 59.309966373837206, "linear_sparsity": 75.98982445987654}, "speed": {"eval_elapsed_time": 18.39338346105069}, "opt_eval_metrics": {"exact_match": 76.98202459791864, "f1": 85.22056943761015}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 43172098, "linear_total": 84934656, "linear_nnz": 19257344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1469440, "linear_attention_total": 2359296, "linear_attention_nnz": 443392, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1692672, "linear_attention_total": 2359296, "linear_attention_nnz": 396288, "linear_dense_total": 4718592, "linear_dense_nnz": 1296384}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692096, "linear_attention_total": 2359296, "linear_attention_nnz": 578560, "linear_dense_total": 4718592, "linear_dense_nnz": 2113536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2728960, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 1973248}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2531328, "linear_attention_total": 2359296, "linear_attention_nnz": 565248, "linear_dense_total": 4718592, "linear_dense_nnz": 1966080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2434048, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 1887232}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1978368, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 1502208}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638400, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1000448}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 620544, "linear_attention_total": 2359296, "linear_attention_nnz": 310272, "linear_dense_total": 4718592, "linear_dense_nnz": 310272}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 457728, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 237568, "linear_dense_total": 4718592, "linear_dense_nnz": 308224}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 467968, "linear_attention_total": 2359296, "linear_attention_nnz": 152576, "linear_dense_total": 4718592, "linear_dense_nnz": 315392}}, "total_sparsity": 60.35371946964616, "linear_sparsity": 77.3268711419753}, "speed": {"eval_elapsed_time": 17.99394460907206}, "opt_eval_metrics": {"exact_match": 76.92526017029329, "f1": 85.21713644985097}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42975330, "linear_total": 84934656, "linear_nnz": 19060736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1463296, "linear_attention_total": 2359296, "linear_attention_nnz": 455680, "linear_dense_total": 4718592, "linear_dense_nnz": 1007616}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1699840, "linear_attention_total": 2359296, "linear_attention_nnz": 399360, "linear_dense_total": 4718592, "linear_dense_nnz": 1300480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2724864, "linear_attention_total": 2359296, "linear_attention_nnz": 544768, "linear_dense_total": 4718592, "linear_dense_nnz": 2180096}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2670592, "linear_attention_total": 2359296, "linear_attention_nnz": 731136, "linear_dense_total": 4718592, "linear_dense_nnz": 1939456}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2498560, "linear_attention_total": 2359296, "linear_attention_nnz": 557056, "linear_dense_total": 4718592, "linear_dense_nnz": 1941504}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2407424, "linear_attention_total": 2359296, "linear_attention_nnz": 527360, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1928192, "linear_attention_total": 2359296, "linear_attention_nnz": 472064, "linear_dense_total": 4718592, "linear_dense_nnz": 1456128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 607232, "linear_dense_total": 4718592, "linear_dense_nnz": 977920}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 607232, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 317440}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455680, "linear_attention_total": 2359296, "linear_attention_nnz": 308224, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 545792, "linear_attention_total": 2359296, "linear_attention_nnz": 240640, "linear_dense_total": 4718592, "linear_dense_nnz": 305152}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 144384, "linear_dense_total": 4718592, "linear_dense_nnz": 329728}}, "total_sparsity": 60.53441764482857, "linear_sparsity": 77.55835262345678}, "speed": {"eval_elapsed_time": 17.949384653009474}, "opt_eval_metrics": {"exact_match": 77.08609271523179, "f1": 85.20287591064626}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 53955042, "linear_total": 84934656, "linear_nnz": 30029824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2392064, "linear_attention_total": 2359296, "linear_attention_nnz": 547840, "linear_dense_total": 4718592, "linear_dense_nnz": 1844224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2719744, "linear_attention_total": 2359296, "linear_attention_nnz": 546816, "linear_dense_total": 4718592, "linear_dense_nnz": 2172928}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3872768, "linear_attention_total": 2359296, "linear_attention_nnz": 675840, "linear_dense_total": 4718592, "linear_dense_nnz": 3196928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4077568, "linear_attention_total": 2359296, "linear_attention_nnz": 965632, "linear_dense_total": 4718592, "linear_dense_nnz": 3111936}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4003840, "linear_attention_total": 2359296, "linear_attention_nnz": 896000, "linear_dense_total": 4718592, "linear_dense_nnz": 3107840}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3832832, "linear_attention_total": 2359296, "linear_attention_nnz": 696320, "linear_dense_total": 4718592, "linear_dense_nnz": 3136512}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3280896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 2525184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2510848, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1711104}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257472, "linear_attention_total": 2359296, "linear_attention_nnz": 509952, "linear_dense_total": 4718592, "linear_dense_nnz": 747520}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 681984, "linear_attention_total": 2359296, "linear_attention_nnz": 420864, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 748544, "linear_attention_total": 2359296, "linear_attention_nnz": 356352, "linear_dense_total": 4718592, "linear_dense_nnz": 392192}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 651264, "linear_attention_total": 2359296, "linear_attention_nnz": 217088, "linear_dense_total": 4718592, "linear_dense_nnz": 434176}}, "total_sparsity": 50.45140657377771, "linear_sparsity": 64.6436149691358}, "speed": {"eval_elapsed_time": 22.46348627889529}, "opt_eval_metrics": {"exact_match": 78.94985808893094, "f1": 86.768721062838}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 55275810, "linear_total": 84934656, "linear_nnz": 31358976, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4016640, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 2542080}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3340800, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 2652672}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4039680, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2860032}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4041216, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2763264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3187200, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 2499072}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2870784, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 2477568}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2580480, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1990656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815552, "linear_attention_total": 2359296, "linear_attention_nnz": 245760, "linear_dense_total": 4718592, "linear_dense_nnz": 1569792}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1893888, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1107456}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 714240, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 517632}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1281024, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 789504}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1577472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1282560}}, "total_sparsity": 49.23850423478289, "linear_sparsity": 63.078703703703695}, "speed": {"eval_elapsed_time": 16.598785888869315}, "opt_eval_metrics": {"exact_match": 78.21192052980132, "f1": 86.01032921346379}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47626001, "linear_total": 84934656, "linear_nnz": 23714304, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2895360, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2330112, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1740288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3240960, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 2061312}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3095040, "linear_attention_total": 2359296, "linear_attention_nnz": 1179648, "linear_dense_total": 4718592, "linear_dense_nnz": 1915392}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2291712, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1800192}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2221056, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1827840}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1861632, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1300992, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 1104384}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637376, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 582144, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 385536}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1052160, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 560640}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1205760, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 910848}}, "total_sparsity": 56.2635617989908, "linear_sparsity": 72.07935474537037}, "speed": {"eval_elapsed_time": 13.948209983995184}, "opt_eval_metrics": {"exact_match": 77.78618732261117, "f1": 85.70556837897196}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 50726818, "linear_total": 84934656, "linear_nnz": 26803200, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2117632, "linear_attention_total": 2359296, "linear_attention_nnz": 757760, "linear_dense_total": 4718592, "linear_dense_nnz": 1359872}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2221056, "linear_attention_total": 2359296, "linear_attention_nnz": 637952, "linear_dense_total": 4718592, "linear_dense_nnz": 1583104}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3328000, "linear_attention_total": 2359296, "linear_attention_nnz": 900096, "linear_dense_total": 4718592, "linear_dense_nnz": 2427904}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3734528, "linear_attention_total": 2359296, "linear_attention_nnz": 1215488, "linear_dense_total": 4718592, "linear_dense_nnz": 2519040}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3650560, "linear_attention_total": 2359296, "linear_attention_nnz": 1223680, "linear_dense_total": 4718592, "linear_dense_nnz": 2426880}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3445760, "linear_attention_total": 2359296, "linear_attention_nnz": 1035264, "linear_dense_total": 4718592, "linear_dense_nnz": 2410496}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2830336, "linear_attention_total": 2359296, "linear_attention_nnz": 1011712, "linear_dense_total": 4718592, "linear_dense_nnz": 1818624}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2111488, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 1195008}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1225728, "linear_attention_total": 2359296, "linear_attention_nnz": 860160, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 765952, "linear_attention_total": 2359296, "linear_attention_nnz": 603136, "linear_dense_total": 4718592, "linear_dense_nnz": 162816}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 745472, "linear_attention_total": 2359296, "linear_attention_nnz": 418816, "linear_dense_total": 4718592, "linear_dense_nnz": 326656}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 302080, "linear_dense_total": 4718592, "linear_dense_nnz": 324608}}, "total_sparsity": 53.41598509203321, "linear_sparsity": 68.4425636574074}, "speed": {"eval_elapsed_time": 22.227510034106672}, "opt_eval_metrics": {"exact_match": 77.64427625354777, "f1": 85.9245488273656}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 45695714, "linear_total": 84934656, "linear_nnz": 21777408, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1549312, "linear_attention_total": 2359296, "linear_attention_nnz": 679936, "linear_dense_total": 4718592, "linear_dense_nnz": 869376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1868800, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 1269760}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2739200, "linear_attention_total": 2359296, "linear_attention_nnz": 875520, "linear_dense_total": 4718592, "linear_dense_nnz": 1863680}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3088384, "linear_attention_total": 2359296, "linear_attention_nnz": 1137664, "linear_dense_total": 4718592, "linear_dense_nnz": 1950720}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2821120, "linear_attention_total": 2359296, "linear_attention_nnz": 1033216, "linear_dense_total": 4718592, "linear_dense_nnz": 1787904}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2709504, "linear_attention_total": 2359296, "linear_attention_nnz": 850944, "linear_dense_total": 4718592, "linear_dense_nnz": 1858560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2225152, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1426432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1865728, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 987136}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049600, "linear_attention_total": 2359296, "linear_attention_nnz": 782336, "linear_dense_total": 4718592, "linear_dense_nnz": 267264}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649216, "linear_attention_total": 2359296, "linear_attention_nnz": 504832, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 662528, "linear_attention_total": 2359296, "linear_attention_nnz": 379904, "linear_dense_total": 4718592, "linear_dense_nnz": 282624}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 548864, "linear_attention_total": 2359296, "linear_attention_nnz": 258048, "linear_dense_total": 4718592, "linear_dense_nnz": 290816}}, "total_sparsity": 58.036204395746125, "linear_sparsity": 74.35980902777779}, "speed": {"eval_elapsed_time": 20.075127677991986}, "opt_eval_metrics": {"exact_match": 77.92809839167455, "f1": 85.97854187426412}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 44413282, "linear_total": 84934656, "linear_nnz": 20496384, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1436672, "linear_attention_total": 2359296, "linear_attention_nnz": 647168, "linear_dense_total": 4718592, "linear_dense_nnz": 789504}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1798144, "linear_attention_total": 2359296, "linear_attention_nnz": 591872, "linear_dense_total": 4718592, "linear_dense_nnz": 1206272}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2583552, "linear_attention_total": 2359296, "linear_attention_nnz": 843776, "linear_dense_total": 4718592, "linear_dense_nnz": 1739776}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2975744, "linear_attention_total": 2359296, "linear_attention_nnz": 1118208, "linear_dense_total": 4718592, "linear_dense_nnz": 1857536}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2673664, "linear_attention_total": 2359296, "linear_attention_nnz": 913408, "linear_dense_total": 4718592, "linear_dense_nnz": 1760256}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2509824, "linear_attention_total": 2359296, "linear_attention_nnz": 791552, "linear_dense_total": 4718592, "linear_dense_nnz": 1718272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2085888, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 1330176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1731584, "linear_attention_total": 2359296, "linear_attention_nnz": 827392, "linear_dense_total": 4718592, "linear_dense_nnz": 904192}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 983040, "linear_attention_total": 2359296, "linear_attention_nnz": 726016, "linear_dense_total": 4718592, "linear_dense_nnz": 257024}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 583680, "linear_attention_total": 2359296, "linear_attention_nnz": 464896, "linear_dense_total": 4718592, "linear_dense_nnz": 118784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 622592, "linear_attention_total": 2359296, "linear_attention_nnz": 359424, "linear_dense_total": 4718592, "linear_dense_nnz": 263168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 512000, "linear_attention_total": 2359296, "linear_attention_nnz": 240640, "linear_dense_total": 4718592, "linear_dense_nnz": 271360}}, "total_sparsity": 59.21390159343854, "linear_sparsity": 75.86805555555556}, "speed": {"eval_elapsed_time": 19.613351088017225}, "opt_eval_metrics": {"exact_match": 77.8713339640492, "f1": 85.84893170709621}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 67469538, "linear_total": 84934656, "linear_nnz": 43535360, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4336640, "linear_attention_total": 2359296, "linear_attention_nnz": 571392, "linear_dense_total": 4718592, "linear_dense_nnz": 3765248}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4451328, "linear_attention_total": 2359296, "linear_attention_nnz": 599040, "linear_dense_total": 4718592, "linear_dense_nnz": 3852288}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4783104, "linear_attention_total": 2359296, "linear_attention_nnz": 695296, "linear_dense_total": 4718592, "linear_dense_nnz": 4087808}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5047296, "linear_attention_total": 2359296, "linear_attention_nnz": 996352, "linear_dense_total": 4718592, "linear_dense_nnz": 4050944}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5032960, "linear_attention_total": 2359296, "linear_attention_nnz": 923648, "linear_dense_total": 4718592, "linear_dense_nnz": 4109312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4907008, "linear_attention_total": 2359296, "linear_attention_nnz": 865280, "linear_dense_total": 4718592, "linear_dense_nnz": 4041728}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4636672, "linear_attention_total": 2359296, "linear_attention_nnz": 778240, "linear_dense_total": 4718592, "linear_dense_nnz": 3858432}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4243456, "linear_attention_total": 2359296, "linear_attention_nnz": 883712, "linear_dense_total": 4718592, "linear_dense_nnz": 3359744}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2818048, "linear_attention_total": 2359296, "linear_attention_nnz": 513024, "linear_dense_total": 4718592, "linear_dense_nnz": 2305024}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1289216, "linear_attention_total": 2359296, "linear_attention_nnz": 462848, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1047552, "linear_attention_total": 2359296, "linear_attention_nnz": 374784, "linear_dense_total": 4718592, "linear_dense_nnz": 672768}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 942080, "linear_attention_total": 2359296, "linear_attention_nnz": 235520, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}}, "total_sparsity": 38.04062450702838, "linear_sparsity": 48.742525077160494}, "speed": {"eval_elapsed_time": 26.131227070000023}, "opt_eval_metrics": {"exact_match": 79.67833491012298, "f1": 87.14623278516426}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 64400930, "linear_total": 84934656, "linear_nnz": 40469504, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3907584, "linear_attention_total": 2359296, "linear_attention_nnz": 527360, "linear_dense_total": 4718592, "linear_dense_nnz": 3380224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4186112, "linear_attention_total": 2359296, "linear_attention_nnz": 524288, "linear_dense_total": 4718592, "linear_dense_nnz": 3661824}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4629504, "linear_attention_total": 2359296, "linear_attention_nnz": 598016, "linear_dense_total": 4718592, "linear_dense_nnz": 4031488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5038080, "linear_attention_total": 2359296, "linear_attention_nnz": 930816, "linear_dense_total": 4718592, "linear_dense_nnz": 4107264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4822016, "linear_attention_total": 2359296, "linear_attention_nnz": 824320, "linear_dense_total": 4718592, "linear_dense_nnz": 3997696}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4773888, "linear_attention_total": 2359296, "linear_attention_nnz": 746496, "linear_dense_total": 4718592, "linear_dense_nnz": 4027392}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4408320, "linear_attention_total": 2359296, "linear_attention_nnz": 670720, "linear_dense_total": 4718592, "linear_dense_nnz": 3737600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3789824, "linear_attention_total": 2359296, "linear_attention_nnz": 794624, "linear_dense_total": 4718592, "linear_dense_nnz": 2995200}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2176000, "linear_attention_total": 2359296, "linear_attention_nnz": 419840, "linear_dense_total": 4718592, "linear_dense_nnz": 1756160}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1011712, "linear_attention_total": 2359296, "linear_attention_nnz": 411648, "linear_dense_total": 4718592, "linear_dense_nnz": 600064}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 903168, "linear_attention_total": 2359296, "linear_attention_nnz": 307200, "linear_dense_total": 4718592, "linear_dense_nnz": 595968}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 823296, "linear_attention_total": 2359296, "linear_attention_nnz": 207872, "linear_dense_total": 4718592, "linear_dense_nnz": 615424}}, "total_sparsity": 40.85862268737366, "linear_sparsity": 52.35218942901234}, "speed": {"eval_elapsed_time": 25.23966666101478}, "opt_eval_metrics": {"exact_match": 79.29990539262063, "f1": 87.09851869948527}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 64383586, "linear_total": 84934656, "linear_nnz": 40452096, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3881984, "linear_attention_total": 2359296, "linear_attention_nnz": 501760, "linear_dense_total": 4718592, "linear_dense_nnz": 3380224}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4185088, "linear_attention_total": 2359296, "linear_attention_nnz": 528384, "linear_dense_total": 4718592, "linear_dense_nnz": 3656704}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4703232, "linear_attention_total": 2359296, "linear_attention_nnz": 581632, "linear_dense_total": 4718592, "linear_dense_nnz": 4121600}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5060608, "linear_attention_total": 2359296, "linear_attention_nnz": 916480, "linear_dense_total": 4718592, "linear_dense_nnz": 4144128}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4893696, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 4060160}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4817920, "linear_attention_total": 2359296, "linear_attention_nnz": 741376, "linear_dense_total": 4718592, "linear_dense_nnz": 4076544}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4459520, "linear_attention_total": 2359296, "linear_attention_nnz": 644096, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3720192, "linear_attention_total": 2359296, "linear_attention_nnz": 757760, "linear_dense_total": 4718592, "linear_dense_nnz": 2962432}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2070528, "linear_attention_total": 2359296, "linear_attention_nnz": 380928, "linear_dense_total": 4718592, "linear_dense_nnz": 1689600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 966656, "linear_attention_total": 2359296, "linear_attention_nnz": 395264, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 874496, "linear_attention_total": 2359296, "linear_attention_nnz": 313344, "linear_dense_total": 4718592, "linear_dense_nnz": 561152}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 818176, "linear_attention_total": 2359296, "linear_attention_nnz": 200704, "linear_dense_total": 4718592, "linear_dense_nnz": 617472}}, "total_sparsity": 40.874550222086434, "linear_sparsity": 52.37268518518518}, "speed": {"eval_elapsed_time": 25.169638738036156}, "opt_eval_metrics": {"exact_match": 79.22421948912014, "f1": 87.0664817371684}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 41117954, "linear_total": 84934656, "linear_nnz": 17159424, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1879296, "linear_attention_total": 2359296, "linear_attention_nnz": 1459968, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1487616, "linear_attention_total": 2359296, "linear_attention_nnz": 930048, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2451456, "linear_attention_total": 2359296, "linear_attention_nnz": 1651200, "linear_dense_total": 4718592, "linear_dense_nnz": 800256}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959168, "linear_attention_total": 2359296, "linear_attention_nnz": 1181952, "linear_dense_total": 4718592, "linear_dense_nnz": 777216}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1876992, "linear_attention_total": 2359296, "linear_attention_nnz": 996864, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1606656, "linear_attention_total": 2359296, "linear_attention_nnz": 720384, "linear_dense_total": 4718592, "linear_dense_nnz": 886272}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1242624, "linear_attention_total": 2359296, "linear_attention_nnz": 595968, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1026048, "linear_attention_total": 2359296, "linear_attention_nnz": 531456, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1362432, "linear_attention_total": 2359296, "linear_attention_nnz": 1029120, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 784128, "linear_attention_total": 2359296, "linear_attention_nnz": 673536, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563712, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.240103802270966, "linear_sparsity": 79.7969111689815}, "speed": {"eval_elapsed_time": 14.573690482182428}, "opt_eval_metrics": {"exact_match": 78.00378429517502, "f1": 85.86131877012127}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41037314, "linear_total": 84934656, "linear_nnz": 17078784, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1881600, "linear_attention_total": 2359296, "linear_attention_nnz": 1460736, "linear_dense_total": 4718592, "linear_dense_nnz": 420864}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488384, "linear_attention_total": 2359296, "linear_attention_nnz": 930816, "linear_dense_total": 4718592, "linear_dense_nnz": 557568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2430720, "linear_attention_total": 2359296, "linear_attention_nnz": 1636608, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1953024, "linear_attention_total": 2359296, "linear_attention_nnz": 1172736, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1822464, "linear_attention_total": 2359296, "linear_attention_nnz": 946944, "linear_dense_total": 4718592, "linear_dense_nnz": 875520}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602816, "linear_attention_total": 2359296, "linear_attention_nnz": 719616, "linear_dense_total": 4718592, "linear_dense_nnz": 883200}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1248768, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1023744, "linear_attention_total": 2359296, "linear_attention_nnz": 530688, "linear_dense_total": 4718592, "linear_dense_nnz": 493056}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360128, "linear_attention_total": 2359296, "linear_attention_nnz": 1026816, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 785664, "linear_attention_total": 2359296, "linear_attention_nnz": 675072, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 562176, "linear_attention_total": 2359296, "linear_attention_nnz": 413184, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 919296, "linear_attention_total": 2359296, "linear_attention_nnz": 658176, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 62.31415802270676, "linear_sparsity": 79.89185474537037}, "speed": {"eval_elapsed_time": 14.54654596094042}, "opt_eval_metrics": {"exact_match": 78.04162724692526, "f1": 85.89832211406967}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 70240546, "linear_total": 84934656, "linear_nnz": 46302208, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4364288, "linear_attention_total": 2359296, "linear_attention_nnz": 770048, "linear_dense_total": 4718592, "linear_dense_nnz": 3594240}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4529152, "linear_attention_total": 2359296, "linear_attention_nnz": 724992, "linear_dense_total": 4718592, "linear_dense_nnz": 3804160}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5151744, "linear_attention_total": 2359296, "linear_attention_nnz": 1142784, "linear_dense_total": 4718592, "linear_dense_nnz": 4008960}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5341184, "linear_attention_total": 2359296, "linear_attention_nnz": 1333248, "linear_dense_total": 4718592, "linear_dense_nnz": 4007936}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5542912, "linear_attention_total": 2359296, "linear_attention_nnz": 1481728, "linear_dense_total": 4718592, "linear_dense_nnz": 4061184}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5168128, "linear_attention_total": 2359296, "linear_attention_nnz": 1220608, "linear_dense_total": 4718592, "linear_dense_nnz": 3947520}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5190656, "linear_attention_total": 2359296, "linear_attention_nnz": 1311744, "linear_dense_total": 4718592, "linear_dense_nnz": 3878912}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4362240, "linear_attention_total": 2359296, "linear_attention_nnz": 1070080, "linear_dense_total": 4718592, "linear_dense_nnz": 3292160}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2987008, "linear_attention_total": 2359296, "linear_attention_nnz": 1006592, "linear_dense_total": 4718592, "linear_dense_nnz": 1980416}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437696, "linear_attention_total": 2359296, "linear_attention_nnz": 684032, "linear_dense_total": 4718592, "linear_dense_nnz": 753664}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193984, "linear_attention_total": 2359296, "linear_attention_nnz": 571392, "linear_dense_total": 4718592, "linear_dense_nnz": 622592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1033216, "linear_attention_total": 2359296, "linear_attention_nnz": 368640, "linear_dense_total": 4718592, "linear_dense_nnz": 664576}}, "total_sparsity": 35.49592166400568, "linear_sparsity": 45.48490547839506}, "speed": {"eval_elapsed_time": 28.841393386013806}, "opt_eval_metrics": {"exact_match": 80.58656575212866, "f1": 87.97635235966065}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 65744386, "linear_total": 84934656, "linear_nnz": 41809920, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3775488, "linear_attention_total": 2359296, "linear_attention_nnz": 634880, "linear_dense_total": 4718592, "linear_dense_nnz": 3140608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4079616, "linear_attention_total": 2359296, "linear_attention_nnz": 602112, "linear_dense_total": 4718592, "linear_dense_nnz": 3477504}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4937728, "linear_attention_total": 2359296, "linear_attention_nnz": 1008640, "linear_dense_total": 4718592, "linear_dense_nnz": 3929088}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5286912, "linear_attention_total": 2359296, "linear_attention_nnz": 1197056, "linear_dense_total": 4718592, "linear_dense_nnz": 4089856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5135360, "linear_attention_total": 2359296, "linear_attention_nnz": 1181696, "linear_dense_total": 4718592, "linear_dense_nnz": 3953664}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 5012480, "linear_attention_total": 2359296, "linear_attention_nnz": 1005568, "linear_dense_total": 4718592, "linear_dense_nnz": 4006912}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4720640, "linear_attention_total": 2359296, "linear_attention_nnz": 1043456, "linear_dense_total": 4718592, "linear_dense_nnz": 3677184}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3708928, "linear_attention_total": 2359296, "linear_attention_nnz": 931840, "linear_dense_total": 4718592, "linear_dense_nnz": 2777088}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2311168, "linear_attention_total": 2359296, "linear_attention_nnz": 862208, "linear_dense_total": 4718592, "linear_dense_nnz": 1448960}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1058816, "linear_attention_total": 2359296, "linear_attention_nnz": 600064, "linear_dense_total": 4718592, "linear_dense_nnz": 458752}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 951296, "linear_attention_total": 2359296, "linear_attention_nnz": 456704, "linear_dense_total": 4718592, "linear_dense_nnz": 494592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 831488, "linear_attention_total": 2359296, "linear_attention_nnz": 289792, "linear_dense_total": 4718592, "linear_dense_nnz": 541696}}, "total_sparsity": 39.6248852522324, "linear_sparsity": 50.774016203703695}, "speed": {"eval_elapsed_time": 27.04506094707176}, "opt_eval_metrics": {"exact_match": 80.48249763481552, "f1": 87.91705961229685}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 38110440, "linear_total": 84934656, "linear_nnz": 14151910, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1521793, "linear_attention_total": 2359296, "linear_attention_nnz": 87221, "linear_dense_total": 4718592, "linear_dense_nnz": 1434572}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637844, "linear_attention_total": 2359296, "linear_attention_nnz": 157517, "linear_dense_total": 4718592, "linear_dense_nnz": 1480327}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1723746, "linear_attention_total": 2359296, "linear_attention_nnz": 188172, "linear_dense_total": 4718592, "linear_dense_nnz": 1535574}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742961, "linear_attention_total": 2359296, "linear_attention_nnz": 230341, "linear_dense_total": 4718592, "linear_dense_nnz": 1512620}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1687428, "linear_attention_total": 2359296, "linear_attention_nnz": 240387, "linear_dense_total": 4718592, "linear_dense_nnz": 1447041}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1623377, "linear_attention_total": 2359296, "linear_attention_nnz": 195780, "linear_dense_total": 4718592, "linear_dense_nnz": 1427597}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1429982, "linear_attention_total": 2359296, "linear_attention_nnz": 184963, "linear_dense_total": 4718592, "linear_dense_nnz": 1245019}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130199, "linear_attention_total": 2359296, "linear_attention_nnz": 172954, "linear_dense_total": 4718592, "linear_dense_nnz": 957245}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 773896, "linear_attention_total": 2359296, "linear_attention_nnz": 138133, "linear_dense_total": 4718592, "linear_dense_nnz": 635763}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417863, "linear_attention_total": 2359296, "linear_attention_nnz": 112972, "linear_dense_total": 4718592, "linear_dense_nnz": 304891}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279992, "linear_attention_total": 2359296, "linear_attention_nnz": 75446, "linear_dense_total": 4718592, "linear_dense_nnz": 204546}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 182829, "linear_attention_total": 2359296, "linear_attention_nnz": 38439, "linear_dense_total": 4718592, "linear_dense_nnz": 144390}}, "total_sparsity": 65.00199746198996, "linear_sparsity": 83.3378850677867}, "speed": {"eval_elapsed_time": 35.92588178999722}, "opt_eval_metrics": {"exact_match": 80.22705771050141, "f1": 88.08154392563726}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37366845, "linear_total": 84934656, "linear_nnz": 13408315, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1442154, "linear_attention_total": 2359296, "linear_attention_nnz": 79341, "linear_dense_total": 4718592, "linear_dense_nnz": 1362813}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1557975, "linear_attention_total": 2359296, "linear_attention_nnz": 146964, "linear_dense_total": 4718592, "linear_dense_nnz": 1411011}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637409, "linear_attention_total": 2359296, "linear_attention_nnz": 173655, "linear_dense_total": 4718592, "linear_dense_nnz": 1463754}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1655712, "linear_attention_total": 2359296, "linear_attention_nnz": 213353, "linear_dense_total": 4718592, "linear_dense_nnz": 1442359}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601748, "linear_attention_total": 2359296, "linear_attention_nnz": 221518, "linear_dense_total": 4718592, "linear_dense_nnz": 1380230}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1539647, "linear_attention_total": 2359296, "linear_attention_nnz": 179373, "linear_dense_total": 4718592, "linear_dense_nnz": 1360274}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1352289, "linear_attention_total": 2359296, "linear_attention_nnz": 168393, "linear_dense_total": 4718592, "linear_dense_nnz": 1183896}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1066215, "linear_attention_total": 2359296, "linear_attention_nnz": 159612, "linear_dense_total": 4718592, "linear_dense_nnz": 906603}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727923, "linear_attention_total": 2359296, "linear_attention_nnz": 127230, "linear_dense_total": 4718592, "linear_dense_nnz": 600693}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 390947, "linear_attention_total": 2359296, "linear_attention_nnz": 105257, "linear_dense_total": 4718592, "linear_dense_nnz": 285690}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 262617, "linear_attention_total": 2359296, "linear_attention_nnz": 70746, "linear_dense_total": 4718592, "linear_dense_nnz": 191871}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 173679, "linear_attention_total": 2359296, "linear_attention_nnz": 36271, "linear_dense_total": 4718592, "linear_dense_nnz": 137408}}, "total_sparsity": 65.68486388119823, "linear_sparsity": 84.21337575088313}, "speed": {"eval_elapsed_time": 35.89134427602403}, "opt_eval_metrics": {"exact_match": 80.53926206244087, "f1": 88.07603620459668}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 45252556, "linear_total": 84934656, "linear_nnz": 21294026, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2152743, "linear_attention_total": 2359296, "linear_attention_nnz": 158912, "linear_dense_total": 4718592, "linear_dense_nnz": 1993831}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2265132, "linear_attention_total": 2359296, "linear_attention_nnz": 234395, "linear_dense_total": 4718592, "linear_dense_nnz": 2030737}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2415512, "linear_attention_total": 2359296, "linear_attention_nnz": 301048, "linear_dense_total": 4718592, "linear_dense_nnz": 2114464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2465567, "linear_attention_total": 2359296, "linear_attention_nnz": 358791, "linear_dense_total": 4718592, "linear_dense_nnz": 2106776}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2457267, "linear_attention_total": 2359296, "linear_attention_nnz": 398673, "linear_dense_total": 4718592, "linear_dense_nnz": 2058594}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2410577, "linear_attention_total": 2359296, "linear_attention_nnz": 367333, "linear_dense_total": 4718592, "linear_dense_nnz": 2043244}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2206780, "linear_attention_total": 2359296, "linear_attention_nnz": 344288, "linear_dense_total": 4718592, "linear_dense_nnz": 1862492}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1819031, "linear_attention_total": 2359296, "linear_attention_nnz": 304514, "linear_dense_total": 4718592, "linear_dense_nnz": 1514517}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1364821, "linear_attention_total": 2359296, "linear_attention_nnz": 265513, "linear_dense_total": 4718592, "linear_dense_nnz": 1099308}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 828990, "linear_attention_total": 2359296, "linear_attention_nnz": 201714, "linear_dense_total": 4718592, "linear_dense_nnz": 627276}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 574541, "linear_attention_total": 2359296, "linear_attention_nnz": 134277, "linear_dense_total": 4718592, "linear_dense_nnz": 440264}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 333065, "linear_attention_total": 2359296, "linear_attention_nnz": 63309, "linear_dense_total": 4718592, "linear_dense_nnz": 269756}}, "total_sparsity": 58.4431701722824, "linear_sparsity": 74.92893124804085}, "speed": {"eval_elapsed_time": 38.17887881118804}, "opt_eval_metrics": {"exact_match": 81.40018921475875, "f1": 88.66263407974378}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 31863042, "linear_total": 84934656, "linear_nnz": 7904512, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 562688, "linear_attention_total": 2359296, "linear_attention_nnz": 260096, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852736, "linear_attention_total": 2359296, "linear_attention_nnz": 361216, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 836352, "linear_attention_total": 2359296, "linear_attention_nnz": 249600, "linear_dense_total": 4718592, "linear_dense_nnz": 586752}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1037824, "linear_attention_total": 2359296, "linear_attention_nnz": 487936, "linear_dense_total": 4718592, "linear_dense_nnz": 549888}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 981760, "linear_attention_total": 2359296, "linear_attention_nnz": 315136, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893184, "linear_attention_total": 2359296, "linear_attention_nnz": 329472, "linear_dense_total": 4718592, "linear_dense_nnz": 563712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621312, "linear_attention_total": 2359296, "linear_attention_nnz": 160512, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817664, "linear_attention_total": 2359296, "linear_attention_nnz": 407552, "linear_dense_total": 4718592, "linear_dense_nnz": 410112}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 446208, "linear_attention_total": 2359296, "linear_attention_nnz": 175872, "linear_dense_total": 4718592, "linear_dense_nnz": 270336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315392, "linear_attention_total": 2359296, "linear_attention_nnz": 218624, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279552, "linear_attention_total": 2359296, "linear_attention_nnz": 187392, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 259840, "linear_attention_total": 2359296, "linear_attention_nnz": 118528, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}}, "total_sparsity": 70.73917738066733, "linear_sparsity": 90.6934196566358}, "speed": {"eval_elapsed_time": 11.982425803085789}, "opt_eval_metrics": {"exact_match": 75.42100283822138, "f1": 84.06741199196578}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 38467586, "linear_total": 84934656, "linear_nnz": 14509056, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1740288, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 314880}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 940032, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 448512}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1992192, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 615936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1728000, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1651200, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 717312}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245696, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1268736, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877056, "linear_attention_total": 2359296, "linear_attention_nnz": 442368, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1049088, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 262656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 629760, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 233472}}, "total_sparsity": 64.67401918059409, "linear_sparsity": 82.9173900462963}, "speed": {"eval_elapsed_time": 11.783776527037844}, "opt_eval_metrics": {"exact_match": 77.9848628192999, "f1": 85.88807770994393}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 38065154, "linear_total": 84934656, "linear_nnz": 14106624, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1669632, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 293376}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 913920, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 422400}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969152, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 592896}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 631296}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1559040, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1219584, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1257984, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 471552}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 955392, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1090560, "linear_attention_total": 2359296, "linear_attention_nnz": 835584, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 431616, "linear_attention_total": 2359296, "linear_attention_nnz": 344064, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708096, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.04358500448319, "linear_sparsity": 83.3912037037037}, "speed": {"eval_elapsed_time": 11.86458179494366}, "opt_eval_metrics": {"exact_match": 77.94701986754967, "f1": 85.90050035022541}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38095874, "linear_total": 84934656, "linear_nnz": 14137344, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1712640, "linear_attention_total": 2359296, "linear_attention_nnz": 1425408, "linear_dense_total": 4718592, "linear_dense_nnz": 287232}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907776, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 416256}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1967616, "linear_attention_total": 2359296, "linear_attention_nnz": 1376256, "linear_dense_total": 4718592, "linear_dense_nnz": 591360}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1711104, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1608192, "linear_attention_total": 2359296, "linear_attention_nnz": 933888, "linear_dense_total": 4718592, "linear_dense_nnz": 674304}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1214976, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 625152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1161216, "linear_attention_total": 2359296, "linear_attention_nnz": 688128, "linear_dense_total": 4718592, "linear_dense_nnz": 473088}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 953856, "linear_attention_total": 2359296, "linear_attention_nnz": 540672, "linear_dense_total": 4718592, "linear_dense_nnz": 413184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1041408, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 254976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 482304, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 757248, "linear_attention_total": 2359296, "linear_attention_nnz": 638976, "linear_dense_total": 4718592, "linear_dense_nnz": 118272}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 619008, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}}, "total_sparsity": 65.0153738728886, "linear_sparsity": 83.35503472222221}, "speed": {"eval_elapsed_time": 11.63978576194495}, "opt_eval_metrics": {"exact_match": 77.43614001892148, "f1": 85.51882546766822}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 37879298, "linear_total": 84934656, "linear_nnz": 13920768, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1160960, "linear_attention_total": 2359296, "linear_attention_nnz": 454400, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245184, "linear_attention_total": 2359296, "linear_attention_nnz": 400384, "linear_dense_total": 4718592, "linear_dense_nnz": 844800}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553408, "linear_attention_total": 2359296, "linear_attention_nnz": 518144, "linear_dense_total": 4718592, "linear_dense_nnz": 1035264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1787648, "linear_attention_total": 2359296, "linear_attention_nnz": 803072, "linear_dense_total": 4718592, "linear_dense_nnz": 984576}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1640960, "linear_attention_total": 2359296, "linear_attention_nnz": 555008, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661696, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1015296}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383680, "linear_attention_total": 2359296, "linear_attention_nnz": 583424, "linear_dense_total": 4718592, "linear_dense_nnz": 800256}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1309440, "linear_attention_total": 2359296, "linear_attention_nnz": 652032, "linear_dense_total": 4718592, "linear_dense_nnz": 657408}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 689664, "linear_attention_total": 2359296, "linear_attention_nnz": 333312, "linear_dense_total": 4718592, "linear_dense_nnz": 356352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520960, "linear_attention_total": 2359296, "linear_attention_nnz": 382720, "linear_dense_total": 4718592, "linear_dense_nnz": 138240}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 461568, "linear_attention_total": 2359296, "linear_attention_nnz": 314112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 505600, "linear_attention_total": 2359296, "linear_attention_nnz": 203008, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}}, "total_sparsity": 65.21426235063046, "linear_sparsity": 83.61002604166666}, "speed": {"eval_elapsed_time": 14.59342699800618}, "opt_eval_metrics": {"exact_match": 76.82119205298014, "f1": 85.29329456078607}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35435778, "linear_total": 84934656, "linear_nnz": 11477248, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887040, "linear_attention_total": 2359296, "linear_attention_nnz": 384768, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1057792, "linear_attention_total": 2359296, "linear_attention_nnz": 355840, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1285888, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497088, "linear_attention_total": 2359296, "linear_attention_nnz": 672256, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1350912, "linear_attention_total": 2359296, "linear_attention_nnz": 418560, "linear_dense_total": 4718592, "linear_dense_nnz": 932352}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1395712, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1154816, "linear_attention_total": 2359296, "linear_attention_nnz": 498944, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059840, "linear_attention_total": 2359296, "linear_attention_nnz": 497664, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 609024, "linear_attention_total": 2359296, "linear_attention_nnz": 297216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 436224, "linear_attention_total": 2359296, "linear_attention_nnz": 316416, "linear_dense_total": 4718592, "linear_dense_nnz": 119808}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 256512, "linear_dense_total": 4718592, "linear_dense_nnz": 115200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371200, "linear_attention_total": 2359296, "linear_attention_nnz": 150016, "linear_dense_total": 4718592, "linear_dense_nnz": 221184}}, "total_sparsity": 67.45822277621669, "linear_sparsity": 86.4869671103395}, "speed": {"eval_elapsed_time": 13.864284622017294}, "opt_eval_metrics": {"exact_match": 77.06717123935667, "f1": 85.28341140334766}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 49759613, "linear_total": 84934656, "linear_nnz": 25846272, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3251712, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2803200, "linear_attention_total": 2359296, "linear_attention_nnz": 884736, "linear_dense_total": 4718592, "linear_dense_nnz": 1918464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3320832, "linear_attention_total": 2359296, "linear_attention_nnz": 1081344, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3353088, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 2075136}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2469888, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1880064}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2322432, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1929216}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2098176, "linear_attention_total": 2359296, "linear_attention_nnz": 589824, "linear_dense_total": 4718592, "linear_dense_nnz": 1508352}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1641984, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1248768}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1638912, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566784, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1075200, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 583680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1304064, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 1009152}}, "total_sparsity": 54.304199529987116, "linear_sparsity": 69.56922743055556}, "speed": {"eval_elapsed_time": 14.753634401829913}, "opt_eval_metrics": {"exact_match": 77.8713339640492, "f1": 85.86552240887988}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 47529298, "linear_total": 84934656, "linear_nnz": 23617536, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2446848, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1660416}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2978304, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1995264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2216448, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1724928}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1824768, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1333248}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1526784, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1133568}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35236717199184, "linear_sparsity": 72.19328703703704}, "speed": {"eval_elapsed_time": 14.023887678980827}, "opt_eval_metrics": {"exact_match": 78.06054872280038, "f1": 85.94002543374285}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 47521613, "linear_total": 84934656, "linear_nnz": 23609856, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2973696, "linear_attention_total": 2359296, "linear_attention_nnz": 1474560, "linear_dense_total": 4718592, "linear_dense_nnz": 1499136}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2445312, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 1658880}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2976768, "linear_attention_total": 2359296, "linear_attention_nnz": 983040, "linear_dense_total": 4718592, "linear_dense_nnz": 1993728}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3133440, "linear_attention_total": 2359296, "linear_attention_nnz": 1277952, "linear_dense_total": 4718592, "linear_dense_nnz": 1855488}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214912, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1723392}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2191872, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1798656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1823232, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1525248, "linear_attention_total": 2359296, "linear_attention_nnz": 393216, "linear_dense_total": 4718592, "linear_dense_nnz": 1132032}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1585152, "linear_attention_total": 2359296, "linear_attention_nnz": 786432, "linear_dense_total": 4718592, "linear_dense_nnz": 798720}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 542208, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1004544, "linear_attention_total": 2359296, "linear_attention_nnz": 491520, "linear_dense_total": 4718592, "linear_dense_nnz": 513024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1193472, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 898560}}, "total_sparsity": 56.35942454654601, "linear_sparsity": 72.2023292824074}, "speed": {"eval_elapsed_time": 14.008215571055189}, "opt_eval_metrics": {"exact_match": 78.10785241248817, "f1": 86.00835164251778}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35398714, "linear_total": 84934656, "linear_nnz": 11493376, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 907264, "linear_attention_total": 2359296, "linear_attention_nnz": 424960, "linear_dense_total": 4718592, "linear_dense_nnz": 482304}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1074176, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253376, "linear_attention_total": 2359296, "linear_attention_nnz": 402432, "linear_dense_total": 4718592, "linear_dense_nnz": 850944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1508352, "linear_attention_total": 2359296, "linear_attention_nnz": 681984, "linear_dense_total": 4718592, "linear_dense_nnz": 826368}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1328640, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 923136}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1422848, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 880128}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1094656, "linear_attention_total": 2359296, "linear_attention_nnz": 449536, "linear_dense_total": 4718592, "linear_dense_nnz": 645120}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1102848, "linear_attention_total": 2359296, "linear_attention_nnz": 577536, "linear_dense_total": 4718592, "linear_dense_nnz": 525312}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 628224, "linear_attention_total": 2359296, "linear_attention_nnz": 294912, "linear_dense_total": 4718592, "linear_dense_nnz": 333312}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 434176, "linear_attention_total": 2359296, "linear_attention_nnz": 320512, "linear_dense_total": 4718592, "linear_dense_nnz": 113664}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 377344, "linear_attention_total": 2359296, "linear_attention_nnz": 256000, "linear_dense_total": 4718592, "linear_dense_nnz": 121344}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 361472, "linear_attention_total": 2359296, "linear_attention_nnz": 146432, "linear_dense_total": 4718592, "linear_dense_nnz": 215040}}, "total_sparsity": 67.49225980035152, "linear_sparsity": 86.46797839506173}, "speed": {"eval_elapsed_time": 13.743516992079094}, "opt_eval_metrics": {"exact_match": 77.18070009460737, "f1": 85.6109462422114}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold_apme-sigmoied_threshold_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 61067266, "linear_total": 84934656, "linear_nnz": 37108736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3221504, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 2607104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3504128, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 2899968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4439040, "linear_attention_total": 2359296, "linear_attention_nnz": 730112, "linear_dense_total": 4718592, "linear_dense_nnz": 3708928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4859904, "linear_attention_total": 2359296, "linear_attention_nnz": 1044480, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4734976, "linear_attention_total": 2359296, "linear_attention_nnz": 1012736, "linear_dense_total": 4718592, "linear_dense_nnz": 3722240}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4660224, "linear_attention_total": 2359296, "linear_attention_nnz": 882688, "linear_dense_total": 4718592, "linear_dense_nnz": 3777536}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4239360, "linear_attention_total": 2359296, "linear_attention_nnz": 980992, "linear_dense_total": 4718592, "linear_dense_nnz": 3258368}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3137536, "linear_attention_total": 2359296, "linear_attention_nnz": 903168, "linear_dense_total": 4718592, "linear_dense_nnz": 2234368}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835008, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 1124352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877568, "linear_attention_total": 2359296, "linear_attention_nnz": 552960, "linear_dense_total": 4718592, "linear_dense_nnz": 324608}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852992, "linear_attention_total": 2359296, "linear_attention_nnz": 401408, "linear_dense_total": 4718592, "linear_dense_nnz": 451584}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746496, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 501760}}, "total_sparsity": 43.920030037508496, "linear_sparsity": 56.309076003086425}, "speed": {"eval_elapsed_time": 25.03221789188683}, "opt_eval_metrics": {"exact_match": 79.64049195837275, "f1": 87.40026291426761}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-30000": {"stats": {"total": 108893186, "nnz": 67034114, "linear_total": 84934656, "linear_nnz": 43075584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4692480, "linear_attention_total": 2359296, "linear_attention_nnz": 892416, "linear_dense_total": 4718592, "linear_dense_nnz": 3800064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4379136, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 3657216}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4735488, "linear_attention_total": 2359296, "linear_attention_nnz": 920064, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4850688, "linear_attention_total": 2359296, "linear_attention_nnz": 1052160, "linear_dense_total": 4718592, "linear_dense_nnz": 3798528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4751616, "linear_attention_total": 2359296, "linear_attention_nnz": 1118976, "linear_dense_total": 4718592, "linear_dense_nnz": 3632640}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4547328, "linear_attention_total": 2359296, "linear_attention_nnz": 1017600, "linear_dense_total": 4718592, "linear_dense_nnz": 3529728}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4306944, "linear_attention_total": 2359296, "linear_attention_nnz": 1061376, "linear_dense_total": 4718592, "linear_dense_nnz": 3245568}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3525888, "linear_attention_total": 2359296, "linear_attention_nnz": 793344, "linear_dense_total": 4718592, "linear_dense_nnz": 2732544}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2793216, "linear_attention_total": 2359296, "linear_attention_nnz": 919296, "linear_dense_total": 4718592, "linear_dense_nnz": 1873920}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1507584, "linear_attention_total": 2359296, "linear_attention_nnz": 541440, "linear_dense_total": 4718592, "linear_dense_nnz": 966144}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130496, "linear_attention_total": 2359296, "linear_attention_nnz": 443904, "linear_dense_total": 4718592, "linear_dense_nnz": 686592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1854720, "linear_attention_total": 2359296, "linear_attention_nnz": 332544, "linear_dense_total": 4718592, "linear_dense_nnz": 1522176}}, "total_sparsity": 38.44048791078626, "linear_sparsity": 49.283854166666664}, "speed": {"eval_elapsed_time": 32.21901372191496}, "opt_eval_metrics": {"exact_match": 79.63103122043519, "f1": 87.14732125236388}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 43891202, "linear_total": 84934656, "linear_nnz": 19932672, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045184, "linear_attention_total": 2359296, "linear_attention_nnz": 427776, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102784, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 1708032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2424576, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 1955328}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2502912, "linear_attention_total": 2359296, "linear_attention_nnz": 579840, "linear_dense_total": 4718592, "linear_dense_nnz": 1923072}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2376960, "linear_attention_total": 2359296, "linear_attention_nnz": 539904, "linear_dense_total": 4718592, "linear_dense_nnz": 1837056}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2201856, "linear_attention_total": 2359296, "linear_attention_nnz": 424704, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1907712, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 1468416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1580544, "linear_attention_total": 2359296, "linear_attention_nnz": 428544, "linear_dense_total": 4718592, "linear_dense_nnz": 1152000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1095168, "linear_attention_total": 2359296, "linear_attention_nnz": 397824, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527616, "linear_attention_total": 2359296, "linear_attention_nnz": 235776, "linear_dense_total": 4718592, "linear_dense_nnz": 291840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 428544, "linear_attention_total": 2359296, "linear_attention_nnz": 182784, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738816, "linear_attention_total": 2359296, "linear_attention_nnz": 112128, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}}, "total_sparsity": 59.6933438975695, "linear_sparsity": 76.53175636574075}, "speed": {"eval_elapsed_time": 23.130286294035614}, "opt_eval_metrics": {"exact_match": 79.15799432355723, "f1": 86.94169166073364}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49229570, "linear_total": 84934656, "linear_nnz": 25271040, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214400, "linear_attention_total": 2359296, "linear_attention_nnz": 721408, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2390784, "linear_attention_total": 2359296, "linear_attention_nnz": 635136, "linear_dense_total": 4718592, "linear_dense_nnz": 1755648}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850560, "linear_attention_total": 2359296, "linear_attention_nnz": 972032, "linear_dense_total": 4718592, "linear_dense_nnz": 1878528}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3188736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 1932288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3149824, "linear_attention_total": 2359296, "linear_attention_nnz": 1260544, "linear_dense_total": 4718592, "linear_dense_nnz": 1889280}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 1784832}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2455040, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015744, "linear_attention_total": 2359296, "linear_attention_nnz": 988160, "linear_dense_total": 4718592, "linear_dense_nnz": 1027584}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1550080, "linear_attention_total": 2359296, "linear_attention_nnz": 903424, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 886784, "linear_attention_total": 2359296, "linear_attention_nnz": 636416, "linear_dense_total": 4718592, "linear_dense_nnz": 250368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 682752, "linear_attention_total": 2359296, "linear_attention_nnz": 484608, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 980224, "linear_attention_total": 2359296, "linear_attention_nnz": 313600, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}}, "total_sparsity": 54.79095450471988, "linear_sparsity": 70.2464916087963}, "speed": {"eval_elapsed_time": 20.265998144168407}, "opt_eval_metrics": {"exact_match": 80.6244087038789, "f1": 88.07723643002453}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 53943554, "linear_total": 84934656, "linear_nnz": 29985024, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3576576, "linear_attention_total": 2359296, "linear_attention_nnz": 840960, "linear_dense_total": 4718592, "linear_dense_nnz": 2735616}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070464, "linear_attention_total": 2359296, "linear_attention_nnz": 288768, "linear_dense_total": 4718592, "linear_dense_nnz": 2781696}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3222528, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 3024384}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3771648, "linear_attention_total": 2359296, "linear_attention_nnz": 770304, "linear_dense_total": 4718592, "linear_dense_nnz": 3001344}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3129600, "linear_attention_total": 2359296, "linear_attention_nnz": 393984, "linear_dense_total": 4718592, "linear_dense_nnz": 2735616}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2864640, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 2668032}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2801664, "linear_attention_total": 2359296, "linear_attention_nnz": 548352, "linear_dense_total": 4718592, "linear_dense_nnz": 2253312}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2118144, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 1920000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1646592, "linear_attention_total": 2359296, "linear_attention_nnz": 284160, "linear_dense_total": 4718592, "linear_dense_nnz": 1362432}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 861696, "linear_attention_total": 2359296, "linear_attention_nnz": 202752, "linear_dense_total": 4718592, "linear_dense_nnz": 658944}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 288000, "linear_dense_total": 4718592, "linear_dense_nnz": 907776}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725696, "linear_attention_total": 2359296, "linear_attention_nnz": 315648, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}}, "total_sparsity": 50.461956361530284, "linear_sparsity": 64.69636140046296}, "speed": {"eval_elapsed_time": 14.391136547084898}, "opt_eval_metrics": {"exact_match": 73.90728476821192, "f1": 82.49310701115485}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49808642, "linear_total": 84934656, "linear_nnz": 25850112, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2888448, "linear_attention_total": 2359296, "linear_attention_nnz": 652032, "linear_dense_total": 4718592, "linear_dense_nnz": 2236416}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2680320, "linear_attention_total": 2359296, "linear_attention_nnz": 293376, "linear_dense_total": 4718592, "linear_dense_nnz": 2386944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2864640, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 2666496}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3171072, "linear_attention_total": 2359296, "linear_attention_nnz": 530688, "linear_dense_total": 4718592, "linear_dense_nnz": 2640384}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2757888, "linear_attention_total": 2359296, "linear_attention_nnz": 392448, "linear_dense_total": 4718592, "linear_dense_nnz": 2365440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2569728, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 2373120}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2244096, "linear_attention_total": 2359296, "linear_attention_nnz": 310272, "linear_dense_total": 4718592, "linear_dense_nnz": 1933824}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1843968, "linear_attention_total": 2359296, "linear_attention_nnz": 197376, "linear_dense_total": 4718592, "linear_dense_nnz": 1646592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1384704, "linear_attention_total": 2359296, "linear_attention_nnz": 200448, "linear_dense_total": 4718592, "linear_dense_nnz": 1184256}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 204288, "linear_dense_total": 4718592, "linear_dense_nnz": 556032}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1115904, "linear_attention_total": 2359296, "linear_attention_nnz": 286464, "linear_dense_total": 4718592, "linear_dense_nnz": 829440}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1569024, "linear_attention_total": 2359296, "linear_attention_nnz": 315648, "linear_dense_total": 4718592, "linear_dense_nnz": 1253376}}, "total_sparsity": 54.25917467416189, "linear_sparsity": 69.56470630787037}, "speed": {"eval_elapsed_time": 12.445335153024644}, "opt_eval_metrics": {"exact_match": 70.05676442762535, "f1": 79.26883508935717}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 42173698, "linear_total": 84934656, "linear_nnz": 18215168, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1516544, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1758464, "linear_attention_total": 2359296, "linear_attention_nnz": 564992, "linear_dense_total": 4718592, "linear_dense_nnz": 1193472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2030080, "linear_attention_total": 2359296, "linear_attention_nnz": 646144, "linear_dense_total": 4718592, "linear_dense_nnz": 1383936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2328832, "linear_attention_total": 2359296, "linear_attention_nnz": 969472, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2283264, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1425408}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2099200, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 1396224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1846784, "linear_attention_total": 2359296, "linear_attention_nnz": 774656, "linear_dense_total": 4718592, "linear_dense_nnz": 1072128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1589760, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 783360}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 967424, "linear_attention_total": 2359296, "linear_attention_nnz": 520448, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 617216, "linear_attention_total": 2359296, "linear_attention_nnz": 435968, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 521984, "linear_attention_total": 2359296, "linear_attention_nnz": 354560, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655616, "linear_attention_total": 2359296, "linear_attention_nnz": 231680, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.27058124647028, "linear_sparsity": 78.55390383873457}, "speed": {"eval_elapsed_time": 16.755018649157137}, "opt_eval_metrics": {"exact_match": 79.25260170293284, "f1": 86.93528973939952}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42038274, "linear_total": 84934656, "linear_nnz": 18079744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1493248, "linear_attention_total": 2359296, "linear_attention_nnz": 519424, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757440, "linear_attention_total": 2359296, "linear_attention_nnz": 565504, "linear_dense_total": 4718592, "linear_dense_nnz": 1191936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028800, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1382400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2297088, "linear_attention_total": 2359296, "linear_attention_nnz": 937728, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2270464, "linear_attention_total": 2359296, "linear_attention_nnz": 846592, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081792, "linear_attention_total": 2359296, "linear_attention_nnz": 688640, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815296, "linear_attention_total": 2359296, "linear_attention_nnz": 744704, "linear_dense_total": 4718592, "linear_dense_nnz": 1070592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1613312, "linear_attention_total": 2359296, "linear_attention_nnz": 831488, "linear_dense_total": 4718592, "linear_dense_nnz": 781824}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 522496, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 594944, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 513792, "linear_attention_total": 2359296, "linear_attention_nnz": 346368, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 644096, "linear_attention_total": 2359296, "linear_attention_nnz": 220160, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.39494531824976, "linear_sparsity": 78.7133487654321}, "speed": {"eval_elapsed_time": 16.721035415073857}, "opt_eval_metrics": {"exact_match": 79.2620624408704, "f1": 86.97825692623259}}}, "base_speed_report": {"eval_elapsed_time": 38.708956059999764}} \ No newline at end of file diff --git a/analysis/files/results/results_back.json b/analysis/files/results/results_back.json deleted file mode 100644 index afbf993a..00000000 --- a/analysis/files/results/results_back.json +++ /dev/null @@ -1 +0,0 @@ -{"checkpoints": {"/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42067458, "linear_total": 84934656, "linear_nnz": 18108928, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1437184, "linear_attention_total": 2359296, "linear_attention_nnz": 472576, "linear_dense_total": 4718592, "linear_dense_nnz": 964608}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1754624, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015488, "linear_attention_total": 2359296, "linear_attention_nnz": 634624, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2276608, "linear_attention_total": 2359296, "linear_attention_nnz": 951040, "linear_dense_total": 4718592, "linear_dense_nnz": 1325568}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2280448, "linear_attention_total": 2359296, "linear_attention_nnz": 861184, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2123008, "linear_attention_total": 2359296, "linear_attention_nnz": 779008, "linear_dense_total": 4718592, "linear_dense_nnz": 1344000}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1841152, "linear_attention_total": 2359296, "linear_attention_nnz": 799744, "linear_dense_total": 4718592, "linear_dense_nnz": 1041408}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553664, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1042432, "linear_attention_total": 2359296, "linear_attention_nnz": 610816, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 584960, "linear_attention_total": 2359296, "linear_attention_nnz": 405248, "linear_dense_total": 4718592, "linear_dense_nnz": 179712}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 540928, "linear_attention_total": 2359296, "linear_attention_nnz": 395008, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 658432, "linear_attention_total": 2359296, "linear_attention_nnz": 217600, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}}, "total_sparsity": 61.3681447432349, "linear_sparsity": 78.6789882330247}, "speed": {"eval_elapsed_time": 18.076128184970003}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41735426, "linear_total": 84934656, "linear_nnz": 17776896, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1405440, "linear_attention_total": 2359296, "linear_attention_nnz": 476160, "linear_dense_total": 4718592, "linear_dense_nnz": 929280}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1732352, "linear_attention_total": 2359296, "linear_attention_nnz": 589568, "linear_dense_total": 4718592, "linear_dense_nnz": 1142784}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1979136, "linear_attention_total": 2359296, "linear_attention_nnz": 628992, "linear_dense_total": 4718592, "linear_dense_nnz": 1350144}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2218752, "linear_attention_total": 2359296, "linear_attention_nnz": 913152, "linear_dense_total": 4718592, "linear_dense_nnz": 1305600}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2257664, "linear_attention_total": 2359296, "linear_attention_nnz": 850688, "linear_dense_total": 4718592, "linear_dense_nnz": 1406976}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2096384, "linear_attention_total": 2359296, "linear_attention_nnz": 764672, "linear_dense_total": 4718592, "linear_dense_nnz": 1331712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1786112, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 1022976}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1538816, "linear_attention_total": 2359296, "linear_attention_nnz": 781568, "linear_dense_total": 4718592, "linear_dense_nnz": 757248}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1027840, "linear_attention_total": 2359296, "linear_attention_nnz": 596224, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 571392, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 176640}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 523008, "linear_attention_total": 2359296, "linear_attention_nnz": 378624, "linear_dense_total": 4718592, "linear_dense_nnz": 144384}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640000, "linear_attention_total": 2359296, "linear_attention_nnz": 208384, "linear_dense_total": 4718592, "linear_dense_nnz": 431616}}, "total_sparsity": 61.67306005721974, "linear_sparsity": 79.0699146412037}, "speed": {"eval_elapsed_time": 17.46750119898934}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 43462146, "linear_total": 84934656, "linear_nnz": 19503616, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1660672, "linear_attention_total": 2359296, "linear_attention_nnz": 579328, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1899776, "linear_attention_total": 2359296, "linear_attention_nnz": 632576, "linear_dense_total": 4718592, "linear_dense_nnz": 1267200}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2031104, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1446912}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2544128, "linear_attention_total": 2359296, "linear_attention_nnz": 1049600, "linear_dense_total": 4718592, "linear_dense_nnz": 1494528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2395904, "linear_attention_total": 2359296, "linear_attention_nnz": 916736, "linear_dense_total": 4718592, "linear_dense_nnz": 1479168}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2184960, "linear_attention_total": 2359296, "linear_attention_nnz": 790272, "linear_dense_total": 4718592, "linear_dense_nnz": 1394688}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1912320, "linear_attention_total": 2359296, "linear_attention_nnz": 798720, "linear_dense_total": 4718592, "linear_dense_nnz": 1113600}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1806336, "linear_attention_total": 2359296, "linear_attention_nnz": 969216, "linear_dense_total": 4718592, "linear_dense_nnz": 837120}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 471808, "linear_dense_total": 4718592, "linear_dense_nnz": 497664}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 717312, "linear_attention_total": 2359296, "linear_attention_nnz": 505344, "linear_dense_total": 4718592, "linear_dense_nnz": 211968}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 631040, "linear_attention_total": 2359296, "linear_attention_nnz": 448256, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 750592, "linear_attention_total": 2359296, "linear_attention_nnz": 288256, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}}, "total_sparsity": 60.08735936884057, "linear_sparsity": 77.03691647376543}, "speed": {"eval_elapsed_time": 15.94129539799178}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42678018, "linear_total": 84934656, "linear_nnz": 18719488, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1560320, "linear_attention_total": 2359296, "linear_attention_nnz": 543488, "linear_dense_total": 4718592, "linear_dense_nnz": 1016832}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1816320, "linear_attention_total": 2359296, "linear_attention_nnz": 593664, "linear_dense_total": 4718592, "linear_dense_nnz": 1222656}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2024704, "linear_attention_total": 2359296, "linear_attention_nnz": 603904, "linear_dense_total": 4718592, "linear_dense_nnz": 1420800}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2329856, "linear_attention_total": 2359296, "linear_attention_nnz": 870656, "linear_dense_total": 4718592, "linear_dense_nnz": 1459200}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2332928, "linear_attention_total": 2359296, "linear_attention_nnz": 887552, "linear_dense_total": 4718592, "linear_dense_nnz": 1445376}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 720640, "linear_dense_total": 4718592, "linear_dense_nnz": 1370112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1887744, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 1081344}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742080, "linear_attention_total": 2359296, "linear_attention_nnz": 926464, "linear_dense_total": 4718592, "linear_dense_nnz": 815616}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 944384, "linear_attention_total": 2359296, "linear_attention_nnz": 455936, "linear_dense_total": 4718592, "linear_dense_nnz": 488448}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 705280, "linear_attention_total": 2359296, "linear_attention_nnz": 505600, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 587264, "linear_attention_total": 2359296, "linear_attention_nnz": 409088, "linear_dense_total": 4718592, "linear_dense_nnz": 178176}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 697856, "linear_attention_total": 2359296, "linear_attention_nnz": 250880, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}}, "total_sparsity": 60.80744850279245, "linear_sparsity": 77.96012972608024}, "speed": {"eval_elapsed_time": 15.653043513011653}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 46192898, "linear_total": 84934656, "linear_nnz": 22234368, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2090752, "linear_attention_total": 2359296, "linear_attention_nnz": 551680, "linear_dense_total": 4718592, "linear_dense_nnz": 1539072}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2278656, "linear_attention_total": 2359296, "linear_attention_nnz": 596736, "linear_dense_total": 4718592, "linear_dense_nnz": 1681920}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2418688, "linear_attention_total": 2359296, "linear_attention_nnz": 567808, "linear_dense_total": 4718592, "linear_dense_nnz": 1850880}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2844416, "linear_attention_total": 2359296, "linear_attention_nnz": 1002752, "linear_dense_total": 4718592, "linear_dense_nnz": 1841664}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2691072, "linear_attention_total": 2359296, "linear_attention_nnz": 878592, "linear_dense_total": 4718592, "linear_dense_nnz": 1812480}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2475264, "linear_attention_total": 2359296, "linear_attention_nnz": 721152, "linear_dense_total": 4718592, "linear_dense_nnz": 1754112}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2229248, "linear_attention_total": 2359296, "linear_attention_nnz": 805376, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1966336, "linear_attention_total": 2359296, "linear_attention_nnz": 892672, "linear_dense_total": 4718592, "linear_dense_nnz": 1073664}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1081344, "linear_attention_total": 2359296, "linear_attention_nnz": 460800, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 701440, "linear_attention_total": 2359296, "linear_attention_nnz": 454144, "linear_dense_total": 4718592, "linear_dense_nnz": 247296}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 598272, "linear_attention_total": 2359296, "linear_attention_nnz": 361728, "linear_dense_total": 4718592, "linear_dense_nnz": 236544}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 858880, "linear_attention_total": 2359296, "linear_attention_nnz": 238336, "linear_dense_total": 4718592, "linear_dense_nnz": 620544}}, "total_sparsity": 57.57962486284496, "linear_sparsity": 73.82179542824075}, "speed": {"eval_elapsed_time": 17.350134194013663}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 39741442, "linear_total": 84934656, "linear_nnz": 15782912, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1171968, "linear_attention_total": 2359296, "linear_attention_nnz": 511488, "linear_dense_total": 4718592, "linear_dense_nnz": 660480}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1531136, "linear_attention_total": 2359296, "linear_attention_nnz": 591104, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1722112, "linear_attention_total": 2359296, "linear_attention_nnz": 656128, "linear_dense_total": 4718592, "linear_dense_nnz": 1065984}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2136320, "linear_attention_total": 2359296, "linear_attention_nnz": 985856, "linear_dense_total": 4718592, "linear_dense_nnz": 1150464}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2026752, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1168896}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1822976, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 1138176}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1488384, "linear_attention_total": 2359296, "linear_attention_nnz": 668160, "linear_dense_total": 4718592, "linear_dense_nnz": 820224}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1423104, "linear_attention_total": 2359296, "linear_attention_nnz": 793344, "linear_dense_total": 4718592, "linear_dense_nnz": 629760}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 861184, "linear_attention_total": 2359296, "linear_attention_nnz": 494080, "linear_dense_total": 4718592, "linear_dense_nnz": 367104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 570880, "linear_attention_total": 2359296, "linear_attention_nnz": 417280, "linear_dense_total": 4718592, "linear_dense_nnz": 153600}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 499968, "linear_attention_total": 2359296, "linear_attention_nnz": 370944, "linear_dense_total": 4718592, "linear_dense_nnz": 129024}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 528128, "linear_attention_total": 2359296, "linear_attention_nnz": 224000, "linear_dense_total": 4718592, "linear_dense_nnz": 304128}}, "total_sparsity": 63.504197590471826, "linear_sparsity": 81.41758294753086}, "speed": {"eval_elapsed_time": 15.804350501974113}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-130000": {"stats": {"total": 108893186, "nnz": 38778370, "linear_total": 84934656, "linear_nnz": 14819840, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1050624, "linear_attention_total": 2359296, "linear_attention_nnz": 488448, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 512512, "linear_dense_total": 4718592, "linear_dense_nnz": 870912}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1628160, "linear_attention_total": 2359296, "linear_attention_nnz": 628224, "linear_dense_total": 4718592, "linear_dense_nnz": 999936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1998592, "linear_attention_total": 2359296, "linear_attention_nnz": 937216, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1939968, "linear_attention_total": 2359296, "linear_attention_nnz": 821760, "linear_dense_total": 4718592, "linear_dense_nnz": 1118208}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1709824, "linear_attention_total": 2359296, "linear_attention_nnz": 648448, "linear_dense_total": 4718592, "linear_dense_nnz": 1061376}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404928, "linear_attention_total": 2359296, "linear_attention_nnz": 641536, "linear_dense_total": 4718592, "linear_dense_nnz": 763392}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1360896, "linear_attention_total": 2359296, "linear_attention_nnz": 755712, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817920, "linear_attention_total": 2359296, "linear_attention_nnz": 467712, "linear_dense_total": 4718592, "linear_dense_nnz": 350208}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 544512, "linear_attention_total": 2359296, "linear_attention_nnz": 403200, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484096, "linear_attention_total": 2359296, "linear_attention_nnz": 367360, "linear_dense_total": 4718592, "linear_dense_nnz": 116736}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 496896, "linear_attention_total": 2359296, "linear_attention_nnz": 225024, "linear_dense_total": 4718592, "linear_dense_nnz": 271872}}, "total_sparsity": 64.38861656596218, "linear_sparsity": 82.5514805169753}, "speed": {"eval_elapsed_time": 15.662423020985443}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-165000": {"stats": {"total": 108893186, "nnz": 38293506, "linear_total": 84934656, "linear_nnz": 14334976, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1010688, "linear_attention_total": 2359296, "linear_attention_nnz": 468480, "linear_dense_total": 4718592, "linear_dense_nnz": 542208}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1371392, "linear_attention_total": 2359296, "linear_attention_nnz": 518912, "linear_dense_total": 4718592, "linear_dense_nnz": 852480}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1590272, "linear_attention_total": 2359296, "linear_attention_nnz": 608768, "linear_dense_total": 4718592, "linear_dense_nnz": 981504}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1895936, "linear_attention_total": 2359296, "linear_attention_nnz": 869888, "linear_dense_total": 4718592, "linear_dense_nnz": 1026048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1869568, "linear_attention_total": 2359296, "linear_attention_nnz": 775936, "linear_dense_total": 4718592, "linear_dense_nnz": 1093632}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1663232, "linear_attention_total": 2359296, "linear_attention_nnz": 618752, "linear_dense_total": 4718592, "linear_dense_nnz": 1044480}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383424, "linear_attention_total": 2359296, "linear_attention_nnz": 629248, "linear_dense_total": 4718592, "linear_dense_nnz": 754176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1295872, "linear_attention_total": 2359296, "linear_attention_nnz": 707584, "linear_dense_total": 4718592, "linear_dense_nnz": 588288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 808704, "linear_attention_total": 2359296, "linear_attention_nnz": 463104, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 515840, "linear_attention_total": 2359296, "linear_attention_nnz": 376064, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 455936, "linear_attention_total": 2359296, "linear_attention_nnz": 345344, "linear_dense_total": 4718592, "linear_dense_nnz": 110592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 474112, "linear_attention_total": 2359296, "linear_attention_nnz": 212992, "linear_dense_total": 4718592, "linear_dense_nnz": 261120}}, "total_sparsity": 64.83388225963009, "linear_sparsity": 83.1223476080247}, "speed": {"eval_elapsed_time": 15.62424924300285}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 38916354, "linear_total": 84934656, "linear_nnz": 14957824, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1209344, "linear_attention_total": 2359296, "linear_attention_nnz": 459776, "linear_dense_total": 4718592, "linear_dense_nnz": 749568}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1494272, "linear_attention_total": 2359296, "linear_attention_nnz": 488192, "linear_dense_total": 4718592, "linear_dense_nnz": 1006080}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1636096, "linear_attention_total": 2359296, "linear_attention_nnz": 550144, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969664, "linear_attention_total": 2359296, "linear_attention_nnz": 868352, "linear_dense_total": 4718592, "linear_dense_nnz": 1101312}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1746944, "linear_attention_total": 2359296, "linear_attention_nnz": 548864, "linear_dense_total": 4718592, "linear_dense_nnz": 1198080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1782272, "linear_attention_total": 2359296, "linear_attention_nnz": 653312, "linear_dense_total": 4718592, "linear_dense_nnz": 1128960}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1461760, "linear_attention_total": 2359296, "linear_attention_nnz": 593920, "linear_dense_total": 4718592, "linear_dense_nnz": 867840}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1391616, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 669696}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 754688, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 387072}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531968, "linear_attention_total": 2359296, "linear_attention_nnz": 373760, "linear_dense_total": 4718592, "linear_dense_nnz": 158208}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460032, "linear_attention_total": 2359296, "linear_attention_nnz": 311040, "linear_dense_total": 4718592, "linear_dense_nnz": 148992}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 519168, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}}, "total_sparsity": 64.26190156654981, "linear_sparsity": 82.38902150848766}, "speed": {"eval_elapsed_time": 15.315251532010734}}, "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": {"stats": {"total": 108893186, "nnz": 50872322, "linear_total": 84934656, "linear_nnz": 26913792, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2692352, "linear_attention_total": 2359296, "linear_attention_nnz": 684800, "linear_dense_total": 4718592, "linear_dense_nnz": 2007552}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2666496, "linear_attention_total": 2359296, "linear_attention_nnz": 646656, "linear_dense_total": 4718592, "linear_dense_nnz": 2019840}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2931200, "linear_attention_total": 2359296, "linear_attention_nnz": 691712, "linear_dense_total": 4718592, "linear_dense_nnz": 2239488}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3361024, "linear_attention_total": 2359296, "linear_attention_nnz": 1149184, "linear_dense_total": 4718592, "linear_dense_nnz": 2211840}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3165952, "linear_attention_total": 2359296, "linear_attention_nnz": 1007872, "linear_dense_total": 4718592, "linear_dense_nnz": 2158080}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070976, "linear_attention_total": 2359296, "linear_attention_nnz": 997376, "linear_dense_total": 4718592, "linear_dense_nnz": 2073600}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2644480, "linear_attention_total": 2359296, "linear_attention_nnz": 911872, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2248704, "linear_attention_total": 2359296, "linear_attention_nnz": 944640, "linear_dense_total": 4718592, "linear_dense_nnz": 1304064}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1514240, "linear_attention_total": 2359296, "linear_attention_nnz": 763136, "linear_dense_total": 4718592, "linear_dense_nnz": 751104}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 839424, "linear_attention_total": 2359296, "linear_attention_nnz": 526080, "linear_dense_total": 4718592, "linear_dense_nnz": 313344}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 707072, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 274944}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1071872, "linear_attention_total": 2359296, "linear_attention_nnz": 277760, "linear_dense_total": 4718592, "linear_dense_nnz": 794112}}, "total_sparsity": 53.282364242699266, "linear_sparsity": 68.31235532407408}, "speed": {"eval_elapsed_time": 19.643985862960108}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 33917936, "linear_total": 84934656, "linear_nnz": 9959406, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1111233, "linear_attention_total": 2359296, "linear_attention_nnz": 56754, "linear_dense_total": 4718592, "linear_dense_nnz": 1054479}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1222867, "linear_attention_total": 2359296, "linear_attention_nnz": 116764, "linear_dense_total": 4718592, "linear_dense_nnz": 1106103}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1264439, "linear_attention_total": 2359296, "linear_attention_nnz": 127558, "linear_dense_total": 4718592, "linear_dense_nnz": 1136881}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1270104, "linear_attention_total": 2359296, "linear_attention_nnz": 163709, "linear_dense_total": 4718592, "linear_dense_nnz": 1106395}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1202300, "linear_attention_total": 2359296, "linear_attention_nnz": 158018, "linear_dense_total": 4718592, "linear_dense_nnz": 1044282}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1136195, "linear_attention_total": 2359296, "linear_attention_nnz": 125746, "linear_dense_total": 4718592, "linear_dense_nnz": 1010449}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 971117, "linear_attention_total": 2359296, "linear_attention_nnz": 110023, "linear_dense_total": 4718592, "linear_dense_nnz": 861094}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746075, "linear_attention_total": 2359296, "linear_attention_nnz": 113086, "linear_dense_total": 4718592, "linear_dense_nnz": 632989}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488971, "linear_attention_total": 2359296, "linear_attention_nnz": 81879, "linear_dense_total": 4718592, "linear_dense_nnz": 407092}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 250695, "linear_attention_total": 2359296, "linear_attention_nnz": 77365, "linear_dense_total": 4718592, "linear_dense_nnz": 173330}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 172793, "linear_attention_total": 2359296, "linear_attention_nnz": 50915, "linear_dense_total": 4718592, "linear_dense_nnz": 121878}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 122617, "linear_attention_total": 2359296, "linear_attention_nnz": 28303, "linear_dense_total": 4718592, "linear_dense_nnz": 94314}}, "total_sparsity": 68.85210429971255, "linear_sparsity": 88.27403739646628}, "speed": {"eval_elapsed_time": 75.02001089300029}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 33825359, "linear_total": 84934656, "linear_nnz": 9866829, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1100628, "linear_attention_total": 2359296, "linear_attention_nnz": 56086, "linear_dense_total": 4718592, "linear_dense_nnz": 1044542}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1211778, "linear_attention_total": 2359296, "linear_attention_nnz": 115328, "linear_dense_total": 4718592, "linear_dense_nnz": 1096450}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1253069, "linear_attention_total": 2359296, "linear_attention_nnz": 125881, "linear_dense_total": 4718592, "linear_dense_nnz": 1127188}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1258511, "linear_attention_total": 2359296, "linear_attention_nnz": 161525, "linear_dense_total": 4718592, "linear_dense_nnz": 1096986}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1191705, "linear_attention_total": 2359296, "linear_attention_nnz": 155911, "linear_dense_total": 4718592, "linear_dense_nnz": 1035794}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1125428, "linear_attention_total": 2359296, "linear_attention_nnz": 123921, "linear_dense_total": 4718592, "linear_dense_nnz": 1001507}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 961919, "linear_attention_total": 2359296, "linear_attention_nnz": 108430, "linear_dense_total": 4718592, "linear_dense_nnz": 853489}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738628, "linear_attention_total": 2359296, "linear_attention_nnz": 111505, "linear_dense_total": 4718592, "linear_dense_nnz": 627123}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 484188, "linear_attention_total": 2359296, "linear_attention_nnz": 80805, "linear_dense_total": 4718592, "linear_dense_nnz": 403383}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 247948, "linear_attention_total": 2359296, "linear_attention_nnz": 76456, "linear_dense_total": 4718592, "linear_dense_nnz": 171492}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 171235, "linear_attention_total": 2359296, "linear_attention_nnz": 50374, "linear_dense_total": 4718592, "linear_dense_nnz": 120861}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 121792, "linear_attention_total": 2359296, "linear_attention_nnz": 28038, "linear_dense_total": 4718592, "linear_dense_nnz": 93754}}, "total_sparsity": 68.93712063856779, "linear_sparsity": 88.38303530657733}, "speed": {"eval_elapsed_time": 75.69579442497343}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 36773378, "linear_total": 84934656, "linear_nnz": 12814848, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1044480, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 605184}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1177088, "linear_attention_total": 2359296, "linear_attention_nnz": 367616, "linear_dense_total": 4718592, "linear_dense_nnz": 809472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1450496, "linear_attention_total": 2359296, "linear_attention_nnz": 492032, "linear_dense_total": 4718592, "linear_dense_nnz": 958464}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1652224, "linear_attention_total": 2359296, "linear_attention_nnz": 733696, "linear_dense_total": 4718592, "linear_dense_nnz": 918528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1511680, "linear_attention_total": 2359296, "linear_attention_nnz": 461056, "linear_dense_total": 4718592, "linear_dense_nnz": 1050624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1533952, "linear_attention_total": 2359296, "linear_attention_nnz": 580096, "linear_dense_total": 4718592, "linear_dense_nnz": 953856}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1227520, "linear_attention_total": 2359296, "linear_attention_nnz": 462592, "linear_dense_total": 4718592, "linear_dense_nnz": 764928}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 624384, "linear_dense_total": 4718592, "linear_dense_nnz": 571392}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 700416, "linear_attention_total": 2359296, "linear_attention_nnz": 351744, "linear_dense_total": 4718592, "linear_dense_nnz": 348672}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 479744, "linear_attention_total": 2359296, "linear_attention_nnz": 339968, "linear_dense_total": 4718592, "linear_dense_nnz": 139776}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 411392, "linear_attention_total": 2359296, "linear_attention_nnz": 276224, "linear_dense_total": 4718592, "linear_dense_nnz": 135168}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 430080, "linear_attention_total": 2359296, "linear_attention_nnz": 178176, "linear_dense_total": 4718592, "linear_dense_nnz": 251904}}, "total_sparsity": 66.22986308803564, "linear_sparsity": 84.912109375}, "speed": {"eval_elapsed_time": 14.328767778992187}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-55000": {"stats": {"total": 108893186, "nnz": 46385410, "linear_total": 84934656, "linear_nnz": 22426880, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2004992, "linear_attention_total": 2359296, "linear_attention_nnz": 594944, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2177280, "linear_attention_total": 2359296, "linear_attention_nnz": 672000, "linear_dense_total": 4718592, "linear_dense_nnz": 1505280}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2592256, "linear_attention_total": 2359296, "linear_attention_nnz": 859648, "linear_dense_total": 4718592, "linear_dense_nnz": 1732608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2838016, "linear_attention_total": 2359296, "linear_attention_nnz": 1172992, "linear_dense_total": 4718592, "linear_dense_nnz": 1665024}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2712832, "linear_attention_total": 2359296, "linear_attention_nnz": 1026304, "linear_dense_total": 4718592, "linear_dense_nnz": 1686528}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2600448, "linear_attention_total": 2359296, "linear_attention_nnz": 976896, "linear_dense_total": 4718592, "linear_dense_nnz": 1623552}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2246144, "linear_attention_total": 2359296, "linear_attention_nnz": 955904, "linear_dense_total": 4718592, "linear_dense_nnz": 1290240}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1842688, "linear_attention_total": 2359296, "linear_attention_nnz": 901120, "linear_dense_total": 4718592, "linear_dense_nnz": 941568}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1241856, "linear_attention_total": 2359296, "linear_attention_nnz": 718080, "linear_dense_total": 4718592, "linear_dense_nnz": 523776}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 693760, "linear_attention_total": 2359296, "linear_attention_nnz": 475648, "linear_dense_total": 4718592, "linear_dense_nnz": 218112}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 640768, "linear_attention_total": 2359296, "linear_attention_nnz": 441088, "linear_dense_total": 4718592, "linear_dense_nnz": 199680}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 835840, "linear_attention_total": 2359296, "linear_attention_nnz": 246016, "linear_dense_total": 4718592, "linear_dense_nnz": 589824}}, "total_sparsity": 57.4028351048522, "linear_sparsity": 73.59513647762346}, "speed": {"eval_elapsed_time": 19.36405121401185}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": {"stats": {"total": 108893186, "nnz": 43189250, "linear_total": 84934656, "linear_nnz": 19230720, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1584896, "linear_attention_total": 2359296, "linear_attention_nnz": 494336, "linear_dense_total": 4718592, "linear_dense_nnz": 1090560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1917184, "linear_attention_total": 2359296, "linear_attention_nnz": 631552, "linear_dense_total": 4718592, "linear_dense_nnz": 1285632}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2092032, "linear_attention_total": 2359296, "linear_attention_nnz": 648192, "linear_dense_total": 4718592, "linear_dense_nnz": 1443840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2466816, "linear_attention_total": 2359296, "linear_attention_nnz": 1047552, "linear_dense_total": 4718592, "linear_dense_nnz": 1419264}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2403328, "linear_attention_total": 2359296, "linear_attention_nnz": 942592, "linear_dense_total": 4718592, "linear_dense_nnz": 1460736}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2211072, "linear_attention_total": 2359296, "linear_attention_nnz": 837888, "linear_dense_total": 4718592, "linear_dense_nnz": 1373184}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1936640, "linear_attention_total": 2359296, "linear_attention_nnz": 841472, "linear_dense_total": 4718592, "linear_dense_nnz": 1095168}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661440, "linear_attention_total": 2359296, "linear_attention_nnz": 833536, "linear_dense_total": 4718592, "linear_dense_nnz": 827904}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1084160, "linear_attention_total": 2359296, "linear_attention_nnz": 621824, "linear_dense_total": 4718592, "linear_dense_nnz": 462336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621056, "linear_attention_total": 2359296, "linear_attention_nnz": 432128, "linear_dense_total": 4718592, "linear_dense_nnz": 188928}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 568064, "linear_attention_total": 2359296, "linear_attention_nnz": 411392, "linear_dense_total": 4718592, "linear_dense_nnz": 156672}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 684032, "linear_attention_total": 2359296, "linear_attention_nnz": 223232, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}}, "total_sparsity": 60.33796825450584, "linear_sparsity": 77.3582175925926}, "speed": {"eval_elapsed_time": 17.96685794304358}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": {"stats": {"total": 108893186, "nnz": 42070530, "linear_total": 84934656, "linear_nnz": 18112000, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1451008, "linear_attention_total": 2359296, "linear_attention_nnz": 480256, "linear_dense_total": 4718592, "linear_dense_nnz": 970752}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835264, "linear_attention_total": 2359296, "linear_attention_nnz": 620288, "linear_dense_total": 4718592, "linear_dense_nnz": 1214976}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2000384, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1374720}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2271232, "linear_attention_total": 2359296, "linear_attention_nnz": 933376, "linear_dense_total": 4718592, "linear_dense_nnz": 1337856}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2267904, "linear_attention_total": 2359296, "linear_attention_nnz": 862464, "linear_dense_total": 4718592, "linear_dense_nnz": 1405440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081536, "linear_attention_total": 2359296, "linear_attention_nnz": 783616, "linear_dense_total": 4718592, "linear_dense_nnz": 1297920}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1807104, "linear_attention_total": 2359296, "linear_attention_nnz": 773376, "linear_dense_total": 4718592, "linear_dense_nnz": 1033728}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1602048, "linear_attention_total": 2359296, "linear_attention_nnz": 811008, "linear_dense_total": 4718592, "linear_dense_nnz": 791040}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1009920, "linear_attention_total": 2359296, "linear_attention_nnz": 572160, "linear_dense_total": 4718592, "linear_dense_nnz": 437760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 592896, "linear_attention_total": 2359296, "linear_attention_nnz": 405504, "linear_dense_total": 4718592, "linear_dense_nnz": 187392}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 543232, "linear_attention_total": 2359296, "linear_attention_nnz": 392704, "linear_dense_total": 4718592, "linear_dense_nnz": 150528}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 649472, "linear_attention_total": 2359296, "linear_attention_nnz": 214784, "linear_dense_total": 4718592, "linear_dense_nnz": 434688}}, "total_sparsity": 61.365323630075444, "linear_sparsity": 78.67537133487654}, "speed": {"eval_elapsed_time": 17.98387801699573}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": {"stats": {"total": 108893186, "nnz": 41670402, "linear_total": 84934656, "linear_nnz": 17711872, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1409024, "linear_attention_total": 2359296, "linear_attention_nnz": 468992, "linear_dense_total": 4718592, "linear_dense_nnz": 940032}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1792000, "linear_attention_total": 2359296, "linear_attention_nnz": 606208, "linear_dense_total": 4718592, "linear_dense_nnz": 1185792}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1974272, "linear_attention_total": 2359296, "linear_attention_nnz": 625664, "linear_dense_total": 4718592, "linear_dense_nnz": 1348608}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2231552, "linear_attention_total": 2359296, "linear_attention_nnz": 910592, "linear_dense_total": 4718592, "linear_dense_nnz": 1320960}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2209536, "linear_attention_total": 2359296, "linear_attention_nnz": 828672, "linear_dense_total": 4718592, "linear_dense_nnz": 1380864}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2046464, "linear_attention_total": 2359296, "linear_attention_nnz": 765440, "linear_dense_total": 4718592, "linear_dense_nnz": 1281024}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1764096, "linear_attention_total": 2359296, "linear_attention_nnz": 761088, "linear_dense_total": 4718592, "linear_dense_nnz": 1003008}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1573120, "linear_attention_total": 2359296, "linear_attention_nnz": 792832, "linear_dense_total": 4718592, "linear_dense_nnz": 780288}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 986880, "linear_attention_total": 2359296, "linear_attention_nnz": 553728, "linear_dense_total": 4718592, "linear_dense_nnz": 433152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 572672, "linear_attention_total": 2359296, "linear_attention_nnz": 389888, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 525568, "linear_attention_total": 2359296, "linear_attention_nnz": 378112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 626688, "linear_attention_total": 2359296, "linear_attention_nnz": 207360, "linear_dense_total": 4718592, "linear_dense_nnz": 419328}}, "total_sparsity": 61.73277361909495, "linear_sparsity": 79.14647231867285}, "speed": {"eval_elapsed_time": 17.518095910025295}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 41478658, "linear_total": 84934656, "linear_nnz": 17520128, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1404160, "linear_attention_total": 2359296, "linear_attention_nnz": 465664, "linear_dense_total": 4718592, "linear_dense_nnz": 938496}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1766912, "linear_attention_total": 2359296, "linear_attention_nnz": 584192, "linear_dense_total": 4718592, "linear_dense_nnz": 1182720}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1961216, "linear_attention_total": 2359296, "linear_attention_nnz": 615680, "linear_dense_total": 4718592, "linear_dense_nnz": 1345536}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2210304, "linear_attention_total": 2359296, "linear_attention_nnz": 895488, "linear_dense_total": 4718592, "linear_dense_nnz": 1314816}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2189824, "linear_attention_total": 2359296, "linear_attention_nnz": 812032, "linear_dense_total": 4718592, "linear_dense_nnz": 1377792}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2038016, "linear_attention_total": 2359296, "linear_attention_nnz": 755456, "linear_dense_total": 4718592, "linear_dense_nnz": 1282560}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1738240, "linear_attention_total": 2359296, "linear_attention_nnz": 739840, "linear_dense_total": 4718592, "linear_dense_nnz": 998400}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1571584, "linear_attention_total": 2359296, "linear_attention_nnz": 797440, "linear_dense_total": 4718592, "linear_dense_nnz": 774144}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 943872, "linear_attention_total": 2359296, "linear_attention_nnz": 513792, "linear_dense_total": 4718592, "linear_dense_nnz": 430080}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 563968, "linear_attention_total": 2359296, "linear_attention_nnz": 381184, "linear_dense_total": 4718592, "linear_dense_nnz": 182784}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 516352, "linear_attention_total": 2359296, "linear_attention_nnz": 370432, "linear_dense_total": 4718592, "linear_dense_nnz": 145920}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 615680, "linear_attention_total": 2359296, "linear_attention_nnz": 200960, "linear_dense_total": 4718592, "linear_dense_nnz": 414720}}, "total_sparsity": 61.90885809879785, "linear_sparsity": 79.37222704475309}, "speed": {"eval_elapsed_time": 17.52969163004309}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 40218943, "linear_total": 84934656, "linear_nnz": 16260413, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725694, "linear_attention_total": 2359296, "linear_attention_nnz": 36794, "linear_dense_total": 4718592, "linear_dense_nnz": 1688900}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1959620, "linear_attention_total": 2359296, "linear_attention_nnz": 233028, "linear_dense_total": 4718592, "linear_dense_nnz": 1726592}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1969125, "linear_attention_total": 2359296, "linear_attention_nnz": 194318, "linear_dense_total": 4718592, "linear_dense_nnz": 1774807}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2012358, "linear_attention_total": 2359296, "linear_attention_nnz": 270153, "linear_dense_total": 4718592, "linear_dense_nnz": 1742205}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1860862, "linear_attention_total": 2359296, "linear_attention_nnz": 207935, "linear_dense_total": 4718592, "linear_dense_nnz": 1652927}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815188, "linear_attention_total": 2359296, "linear_attention_nnz": 215427, "linear_dense_total": 4718592, "linear_dense_nnz": 1599761}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1518978, "linear_attention_total": 2359296, "linear_attention_nnz": 114563, "linear_dense_total": 4718592, "linear_dense_nnz": 1404415}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1307646, "linear_attention_total": 2359296, "linear_attention_nnz": 165011, "linear_dense_total": 4718592, "linear_dense_nnz": 1142635}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 946142, "linear_attention_total": 2359296, "linear_attention_nnz": 86589, "linear_dense_total": 4718592, "linear_dense_nnz": 859553}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 531809, "linear_attention_total": 2359296, "linear_attention_nnz": 110020, "linear_dense_total": 4718592, "linear_dense_nnz": 421789}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 419075, "linear_attention_total": 2359296, "linear_attention_nnz": 89475, "linear_dense_total": 4718592, "linear_dense_nnz": 329600}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 193916, "linear_attention_total": 2359296, "linear_attention_nnz": 45791, "linear_dense_total": 4718592, "linear_dense_nnz": 148125}}, "total_sparsity": 63.065693568741764, "linear_sparsity": 80.85538487375518}, "speed": {"eval_elapsed_time": 59.936431092966814}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": {"stats": {"total": 108893186, "nnz": 30451970, "linear_total": 84934656, "linear_nnz": 6493440, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 490240, "linear_attention_total": 2359296, "linear_attention_nnz": 259840, "linear_dense_total": 4718592, "linear_dense_nnz": 230400}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 591104, "linear_attention_total": 2359296, "linear_attention_nnz": 225536, "linear_dense_total": 4718592, "linear_dense_nnz": 365568}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 770048, "linear_attention_total": 2359296, "linear_attention_nnz": 286208, "linear_dense_total": 4718592, "linear_dense_nnz": 483840}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 863488, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 450048}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 747008, "linear_attention_total": 2359296, "linear_attention_nnz": 214016, "linear_dense_total": 4718592, "linear_dense_nnz": 532992}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 763392, "linear_attention_total": 2359296, "linear_attention_nnz": 285696, "linear_dense_total": 4718592, "linear_dense_nnz": 477696}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 499456, "linear_attention_total": 2359296, "linear_attention_nnz": 113920, "linear_dense_total": 4718592, "linear_dense_nnz": 385536}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 650752, "linear_attention_total": 2359296, "linear_attention_nnz": 303616, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 407808, "linear_attention_total": 2359296, "linear_attention_nnz": 162048, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 293888, "linear_attention_total": 2359296, "linear_attention_nnz": 206336, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 206336, "linear_attention_total": 2359296, "linear_attention_nnz": 117248, "linear_dense_total": 4718592, "linear_dense_nnz": 89088}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 209920, "linear_attention_total": 2359296, "linear_attention_nnz": 103936, "linear_dense_total": 4718592, "linear_dense_nnz": 105984}}, "total_sparsity": 72.03500869191208, "linear_sparsity": 92.35478153935185}, "speed": {"eval_elapsed_time": 11.021364552958403}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 30144002, "linear_total": 84934656, "linear_nnz": 6185472, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 462848, "linear_attention_total": 2359296, "linear_attention_nnz": 237056, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566784, "linear_attention_total": 2359296, "linear_attention_nnz": 219648, "linear_dense_total": 4718592, "linear_dense_nnz": 347136}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 744960, "linear_attention_total": 2359296, "linear_attention_nnz": 278016, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 823552, "linear_attention_total": 2359296, "linear_attention_nnz": 379648, "linear_dense_total": 4718592, "linear_dense_nnz": 443904}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 708352, "linear_attention_total": 2359296, "linear_attention_nnz": 193792, "linear_dense_total": 4718592, "linear_dense_nnz": 514560}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 703488, "linear_attention_total": 2359296, "linear_attention_nnz": 247296, "linear_dense_total": 4718592, "linear_dense_nnz": 456192}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 488448, "linear_attention_total": 2359296, "linear_attention_nnz": 118272, "linear_dense_total": 4718592, "linear_dense_nnz": 370176}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 636160, "linear_attention_total": 2359296, "linear_attention_nnz": 296704, "linear_dense_total": 4718592, "linear_dense_nnz": 339456}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 393728, "linear_attention_total": 2359296, "linear_attention_nnz": 152576, "linear_dense_total": 4718592, "linear_dense_nnz": 241152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 278528, "linear_attention_total": 2359296, "linear_attention_nnz": 190976, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 190976, "linear_attention_total": 2359296, "linear_attention_nnz": 104960, "linear_dense_total": 4718592, "linear_dense_nnz": 86016}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 187648, "linear_attention_total": 2359296, "linear_attention_nnz": 90880, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 72.31782528614784, "linear_sparsity": 92.7173755787037}, "speed": {"eval_elapsed_time": 10.957513606990688}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 30105858, "linear_total": 84934656, "linear_nnz": 6147328, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 460800, "linear_attention_total": 2359296, "linear_attention_nnz": 235008, "linear_dense_total": 4718592, "linear_dense_nnz": 225792}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 566272, "linear_attention_total": 2359296, "linear_attention_nnz": 220672, "linear_dense_total": 4718592, "linear_dense_nnz": 345600}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727040, "linear_attention_total": 2359296, "linear_attention_nnz": 260096, "linear_dense_total": 4718592, "linear_dense_nnz": 466944}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 821760, "linear_attention_total": 2359296, "linear_attention_nnz": 380928, "linear_dense_total": 4718592, "linear_dense_nnz": 440832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 718080, "linear_attention_total": 2359296, "linear_attention_nnz": 215808, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 706048, "linear_attention_total": 2359296, "linear_attention_nnz": 251392, "linear_dense_total": 4718592, "linear_dense_nnz": 454656}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 475648, "linear_attention_total": 2359296, "linear_attention_nnz": 103936, "linear_dense_total": 4718592, "linear_dense_nnz": 371712}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 624384, "linear_attention_total": 2359296, "linear_attention_nnz": 284928, "linear_dense_total": 4718592, "linear_dense_nnz": 339456}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 388608, "linear_attention_total": 2359296, "linear_attention_nnz": 147456, "linear_dense_total": 4718592, "linear_dense_nnz": 241152}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 268800, "linear_attention_total": 2359296, "linear_attention_nnz": 181248, "linear_dense_total": 4718592, "linear_dense_nnz": 87552}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 199168, "linear_attention_total": 2359296, "linear_attention_nnz": 113152, "linear_dense_total": 4718592, "linear_dense_nnz": 86016}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 190720, "linear_attention_total": 2359296, "linear_attention_nnz": 93952, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}}, "total_sparsity": 72.35285410787779, "linear_sparsity": 92.76228539737654}, "speed": {"eval_elapsed_time": 10.933026321989018}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": {"stats": {"total": 108893186, "nnz": 38110440, "linear_total": 84934656, "linear_nnz": 14151910, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1521793, "linear_attention_total": 2359296, "linear_attention_nnz": 87221, "linear_dense_total": 4718592, "linear_dense_nnz": 1434572}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637844, "linear_attention_total": 2359296, "linear_attention_nnz": 157517, "linear_dense_total": 4718592, "linear_dense_nnz": 1480327}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1723746, "linear_attention_total": 2359296, "linear_attention_nnz": 188172, "linear_dense_total": 4718592, "linear_dense_nnz": 1535574}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1742961, "linear_attention_total": 2359296, "linear_attention_nnz": 230341, "linear_dense_total": 4718592, "linear_dense_nnz": 1512620}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1687428, "linear_attention_total": 2359296, "linear_attention_nnz": 240387, "linear_dense_total": 4718592, "linear_dense_nnz": 1447041}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1623377, "linear_attention_total": 2359296, "linear_attention_nnz": 195780, "linear_dense_total": 4718592, "linear_dense_nnz": 1427597}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1429982, "linear_attention_total": 2359296, "linear_attention_nnz": 184963, "linear_dense_total": 4718592, "linear_dense_nnz": 1245019}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130199, "linear_attention_total": 2359296, "linear_attention_nnz": 172954, "linear_dense_total": 4718592, "linear_dense_nnz": 957245}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 773896, "linear_attention_total": 2359296, "linear_attention_nnz": 138133, "linear_dense_total": 4718592, "linear_dense_nnz": 635763}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 417863, "linear_attention_total": 2359296, "linear_attention_nnz": 112972, "linear_dense_total": 4718592, "linear_dense_nnz": 304891}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279992, "linear_attention_total": 2359296, "linear_attention_nnz": 75446, "linear_dense_total": 4718592, "linear_dense_nnz": 204546}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 182829, "linear_attention_total": 2359296, "linear_attention_nnz": 38439, "linear_dense_total": 4718592, "linear_dense_nnz": 144390}}, "total_sparsity": 65.00199746198996, "linear_sparsity": 83.3378850677867}, "speed": {"eval_elapsed_time": 78.46566343901213, "optimize_mode": "block_sparse"}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 37366845, "linear_total": 84934656, "linear_nnz": 13408315, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1442154, "linear_attention_total": 2359296, "linear_attention_nnz": 79341, "linear_dense_total": 4718592, "linear_dense_nnz": 1362813}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1557975, "linear_attention_total": 2359296, "linear_attention_nnz": 146964, "linear_dense_total": 4718592, "linear_dense_nnz": 1411011}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1637409, "linear_attention_total": 2359296, "linear_attention_nnz": 173655, "linear_dense_total": 4718592, "linear_dense_nnz": 1463754}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1655712, "linear_attention_total": 2359296, "linear_attention_nnz": 213353, "linear_dense_total": 4718592, "linear_dense_nnz": 1442359}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1601748, "linear_attention_total": 2359296, "linear_attention_nnz": 221518, "linear_dense_total": 4718592, "linear_dense_nnz": 1380230}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1539647, "linear_attention_total": 2359296, "linear_attention_nnz": 179373, "linear_dense_total": 4718592, "linear_dense_nnz": 1360274}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1352289, "linear_attention_total": 2359296, "linear_attention_nnz": 168393, "linear_dense_total": 4718592, "linear_dense_nnz": 1183896}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1066215, "linear_attention_total": 2359296, "linear_attention_nnz": 159612, "linear_dense_total": 4718592, "linear_dense_nnz": 906603}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 727923, "linear_attention_total": 2359296, "linear_attention_nnz": 127230, "linear_dense_total": 4718592, "linear_dense_nnz": 600693}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 390947, "linear_attention_total": 2359296, "linear_attention_nnz": 105257, "linear_dense_total": 4718592, "linear_dense_nnz": 285690}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 262617, "linear_attention_total": 2359296, "linear_attention_nnz": 70746, "linear_dense_total": 4718592, "linear_dense_nnz": 191871}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 173679, "linear_attention_total": 2359296, "linear_attention_nnz": 36271, "linear_dense_total": 4718592, "linear_dense_nnz": 137408}}, "total_sparsity": 65.68486388119823, "linear_sparsity": 84.21337575088313}, "speed": {"eval_elapsed_time": 78.30115663801553, "optimize_mode": "block_sparse"}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 31863042, "linear_total": 84934656, "linear_nnz": 7904512, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 562688, "linear_attention_total": 2359296, "linear_attention_nnz": 260096, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852736, "linear_attention_total": 2359296, "linear_attention_nnz": 361216, "linear_dense_total": 4718592, "linear_dense_nnz": 491520}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 836352, "linear_attention_total": 2359296, "linear_attention_nnz": 249600, "linear_dense_total": 4718592, "linear_dense_nnz": 586752}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1037824, "linear_attention_total": 2359296, "linear_attention_nnz": 487936, "linear_dense_total": 4718592, "linear_dense_nnz": 549888}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 981760, "linear_attention_total": 2359296, "linear_attention_nnz": 315136, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 893184, "linear_attention_total": 2359296, "linear_attention_nnz": 329472, "linear_dense_total": 4718592, "linear_dense_nnz": 563712}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 621312, "linear_attention_total": 2359296, "linear_attention_nnz": 160512, "linear_dense_total": 4718592, "linear_dense_nnz": 460800}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 817664, "linear_attention_total": 2359296, "linear_attention_nnz": 407552, "linear_dense_total": 4718592, "linear_dense_nnz": 410112}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 446208, "linear_attention_total": 2359296, "linear_attention_nnz": 175872, "linear_dense_total": 4718592, "linear_dense_nnz": 270336}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 315392, "linear_attention_total": 2359296, "linear_attention_nnz": 218624, "linear_dense_total": 4718592, "linear_dense_nnz": 96768}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 279552, "linear_attention_total": 2359296, "linear_attention_nnz": 187392, "linear_dense_total": 4718592, "linear_dense_nnz": 92160}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 259840, "linear_attention_total": 2359296, "linear_attention_nnz": 118528, "linear_dense_total": 4718592, "linear_dense_nnz": 141312}}, "total_sparsity": 70.73917738066733, "linear_sparsity": 90.6934196566358}, "speed": {"eval_elapsed_time": 11.901203104003798}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 37879298, "linear_total": 84934656, "linear_nnz": 13920768, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1160960, "linear_attention_total": 2359296, "linear_attention_nnz": 454400, "linear_dense_total": 4718592, "linear_dense_nnz": 706560}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1245184, "linear_attention_total": 2359296, "linear_attention_nnz": 400384, "linear_dense_total": 4718592, "linear_dense_nnz": 844800}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1553408, "linear_attention_total": 2359296, "linear_attention_nnz": 518144, "linear_dense_total": 4718592, "linear_dense_nnz": 1035264}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1787648, "linear_attention_total": 2359296, "linear_attention_nnz": 803072, "linear_dense_total": 4718592, "linear_dense_nnz": 984576}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1640960, "linear_attention_total": 2359296, "linear_attention_nnz": 555008, "linear_dense_total": 4718592, "linear_dense_nnz": 1085952}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1661696, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1015296}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1383680, "linear_attention_total": 2359296, "linear_attention_nnz": 583424, "linear_dense_total": 4718592, "linear_dense_nnz": 800256}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1309440, "linear_attention_total": 2359296, "linear_attention_nnz": 652032, "linear_dense_total": 4718592, "linear_dense_nnz": 657408}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 689664, "linear_attention_total": 2359296, "linear_attention_nnz": 333312, "linear_dense_total": 4718592, "linear_dense_nnz": 356352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 520960, "linear_attention_total": 2359296, "linear_attention_nnz": 382720, "linear_dense_total": 4718592, "linear_dense_nnz": 138240}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 461568, "linear_attention_total": 2359296, "linear_attention_nnz": 314112, "linear_dense_total": 4718592, "linear_dense_nnz": 147456}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 505600, "linear_attention_total": 2359296, "linear_attention_nnz": 203008, "linear_dense_total": 4718592, "linear_dense_nnz": 302592}}, "total_sparsity": 65.21426235063046, "linear_sparsity": 83.61002604166666}, "speed": {"eval_elapsed_time": 14.474253287015017}}, "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 35435778, "linear_total": 84934656, "linear_nnz": 11477248, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 887040, "linear_attention_total": 2359296, "linear_attention_nnz": 384768, "linear_dense_total": 4718592, "linear_dense_nnz": 502272}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1057792, "linear_attention_total": 2359296, "linear_attention_nnz": 355840, "linear_dense_total": 4718592, "linear_dense_nnz": 701952}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1285888, "linear_attention_total": 2359296, "linear_attention_nnz": 413440, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1497088, "linear_attention_total": 2359296, "linear_attention_nnz": 672256, "linear_dense_total": 4718592, "linear_dense_nnz": 824832}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1350912, "linear_attention_total": 2359296, "linear_attention_nnz": 418560, "linear_dense_total": 4718592, "linear_dense_nnz": 932352}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1395712, "linear_attention_total": 2359296, "linear_attention_nnz": 523264, "linear_dense_total": 4718592, "linear_dense_nnz": 872448}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1154816, "linear_attention_total": 2359296, "linear_attention_nnz": 498944, "linear_dense_total": 4718592, "linear_dense_nnz": 655872}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1059840, "linear_attention_total": 2359296, "linear_attention_nnz": 497664, "linear_dense_total": 4718592, "linear_dense_nnz": 562176}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 609024, "linear_attention_total": 2359296, "linear_attention_nnz": 297216, "linear_dense_total": 4718592, "linear_dense_nnz": 311808}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 436224, "linear_attention_total": 2359296, "linear_attention_nnz": 316416, "linear_dense_total": 4718592, "linear_dense_nnz": 119808}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371712, "linear_attention_total": 2359296, "linear_attention_nnz": 256512, "linear_dense_total": 4718592, "linear_dense_nnz": 115200}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 371200, "linear_attention_total": 2359296, "linear_attention_nnz": 150016, "linear_dense_total": 4718592, "linear_dense_nnz": 221184}}, "total_sparsity": 67.45822277621669, "linear_sparsity": 86.4869671103395}, "speed": {"eval_elapsed_time": 13.766221412981395}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold_apme-sigmoied_threshold_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 61067266, "linear_total": 84934656, "linear_nnz": 37108736, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3221504, "linear_attention_total": 2359296, "linear_attention_nnz": 614400, "linear_dense_total": 4718592, "linear_dense_nnz": 2607104}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3504128, "linear_attention_total": 2359296, "linear_attention_nnz": 604160, "linear_dense_total": 4718592, "linear_dense_nnz": 2899968}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4439040, "linear_attention_total": 2359296, "linear_attention_nnz": 730112, "linear_dense_total": 4718592, "linear_dense_nnz": 3708928}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4859904, "linear_attention_total": 2359296, "linear_attention_nnz": 1044480, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4734976, "linear_attention_total": 2359296, "linear_attention_nnz": 1012736, "linear_dense_total": 4718592, "linear_dense_nnz": 3722240}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4660224, "linear_attention_total": 2359296, "linear_attention_nnz": 882688, "linear_dense_total": 4718592, "linear_dense_nnz": 3777536}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4239360, "linear_attention_total": 2359296, "linear_attention_nnz": 980992, "linear_dense_total": 4718592, "linear_dense_nnz": 3258368}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3137536, "linear_attention_total": 2359296, "linear_attention_nnz": 903168, "linear_dense_total": 4718592, "linear_dense_nnz": 2234368}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1835008, "linear_attention_total": 2359296, "linear_attention_nnz": 710656, "linear_dense_total": 4718592, "linear_dense_nnz": 1124352}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 877568, "linear_attention_total": 2359296, "linear_attention_nnz": 552960, "linear_dense_total": 4718592, "linear_dense_nnz": 324608}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 852992, "linear_attention_total": 2359296, "linear_attention_nnz": 401408, "linear_dense_total": 4718592, "linear_dense_nnz": 451584}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 746496, "linear_attention_total": 2359296, "linear_attention_nnz": 244736, "linear_dense_total": 4718592, "linear_dense_nnz": 501760}}, "total_sparsity": 43.920030037508496, "linear_sparsity": 56.309076003086425}, "speed": {"eval_elapsed_time": 47.75363156700041}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-30000": {"stats": {"total": 108893186, "nnz": 67034114, "linear_total": 84934656, "linear_nnz": 43075584, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4692480, "linear_attention_total": 2359296, "linear_attention_nnz": 892416, "linear_dense_total": 4718592, "linear_dense_nnz": 3800064}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4379136, "linear_attention_total": 2359296, "linear_attention_nnz": 721920, "linear_dense_total": 4718592, "linear_dense_nnz": 3657216}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4735488, "linear_attention_total": 2359296, "linear_attention_nnz": 920064, "linear_dense_total": 4718592, "linear_dense_nnz": 3815424}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4850688, "linear_attention_total": 2359296, "linear_attention_nnz": 1052160, "linear_dense_total": 4718592, "linear_dense_nnz": 3798528}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4751616, "linear_attention_total": 2359296, "linear_attention_nnz": 1118976, "linear_dense_total": 4718592, "linear_dense_nnz": 3632640}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4547328, "linear_attention_total": 2359296, "linear_attention_nnz": 1017600, "linear_dense_total": 4718592, "linear_dense_nnz": 3529728}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 4306944, "linear_attention_total": 2359296, "linear_attention_nnz": 1061376, "linear_dense_total": 4718592, "linear_dense_nnz": 3245568}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3525888, "linear_attention_total": 2359296, "linear_attention_nnz": 793344, "linear_dense_total": 4718592, "linear_dense_nnz": 2732544}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2793216, "linear_attention_total": 2359296, "linear_attention_nnz": 919296, "linear_dense_total": 4718592, "linear_dense_nnz": 1873920}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1507584, "linear_attention_total": 2359296, "linear_attention_nnz": 541440, "linear_dense_total": 4718592, "linear_dense_nnz": 966144}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1130496, "linear_attention_total": 2359296, "linear_attention_nnz": 443904, "linear_dense_total": 4718592, "linear_dense_nnz": 686592}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1854720, "linear_attention_total": 2359296, "linear_attention_nnz": 332544, "linear_dense_total": 4718592, "linear_dense_nnz": 1522176}}, "total_sparsity": 38.44048791078626, "linear_sparsity": 49.283854166666664}, "speed": {"eval_elapsed_time": 32.06774970900733}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 43891202, "linear_total": 84934656, "linear_nnz": 19932672, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2045184, "linear_attention_total": 2359296, "linear_attention_nnz": 427776, "linear_dense_total": 4718592, "linear_dense_nnz": 1617408}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2102784, "linear_attention_total": 2359296, "linear_attention_nnz": 394752, "linear_dense_total": 4718592, "linear_dense_nnz": 1708032}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2424576, "linear_attention_total": 2359296, "linear_attention_nnz": 469248, "linear_dense_total": 4718592, "linear_dense_nnz": 1955328}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2502912, "linear_attention_total": 2359296, "linear_attention_nnz": 579840, "linear_dense_total": 4718592, "linear_dense_nnz": 1923072}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2376960, "linear_attention_total": 2359296, "linear_attention_nnz": 539904, "linear_dense_total": 4718592, "linear_dense_nnz": 1837056}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2201856, "linear_attention_total": 2359296, "linear_attention_nnz": 424704, "linear_dense_total": 4718592, "linear_dense_nnz": 1777152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1907712, "linear_attention_total": 2359296, "linear_attention_nnz": 439296, "linear_dense_total": 4718592, "linear_dense_nnz": 1468416}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1580544, "linear_attention_total": 2359296, "linear_attention_nnz": 428544, "linear_dense_total": 4718592, "linear_dense_nnz": 1152000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1095168, "linear_attention_total": 2359296, "linear_attention_nnz": 397824, "linear_dense_total": 4718592, "linear_dense_nnz": 697344}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 527616, "linear_attention_total": 2359296, "linear_attention_nnz": 235776, "linear_dense_total": 4718592, "linear_dense_nnz": 291840}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 428544, "linear_attention_total": 2359296, "linear_attention_nnz": 182784, "linear_dense_total": 4718592, "linear_dense_nnz": 245760}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 738816, "linear_attention_total": 2359296, "linear_attention_nnz": 112128, "linear_dense_total": 4718592, "linear_dense_nnz": 626688}}, "total_sparsity": 59.6933438975695, "linear_sparsity": 76.53175636574075}, "speed": {"eval_elapsed_time": 23.040934944001492}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49229570, "linear_total": 84934656, "linear_nnz": 25271040, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2214400, "linear_attention_total": 2359296, "linear_attention_nnz": 721408, "linear_dense_total": 4718592, "linear_dense_nnz": 1492992}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2390784, "linear_attention_total": 2359296, "linear_attention_nnz": 635136, "linear_dense_total": 4718592, "linear_dense_nnz": 1755648}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2850560, "linear_attention_total": 2359296, "linear_attention_nnz": 972032, "linear_dense_total": 4718592, "linear_dense_nnz": 1878528}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3188736, "linear_attention_total": 2359296, "linear_attention_nnz": 1256448, "linear_dense_total": 4718592, "linear_dense_nnz": 1932288}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3149824, "linear_attention_total": 2359296, "linear_attention_nnz": 1260544, "linear_dense_total": 4718592, "linear_dense_nnz": 1889280}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2906112, "linear_attention_total": 2359296, "linear_attention_nnz": 1121280, "linear_dense_total": 4718592, "linear_dense_nnz": 1784832}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2455040, "linear_attention_total": 2359296, "linear_attention_nnz": 1061888, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2015744, "linear_attention_total": 2359296, "linear_attention_nnz": 988160, "linear_dense_total": 4718592, "linear_dense_nnz": 1027584}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1550080, "linear_attention_total": 2359296, "linear_attention_nnz": 903424, "linear_dense_total": 4718592, "linear_dense_nnz": 646656}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 886784, "linear_attention_total": 2359296, "linear_attention_nnz": 636416, "linear_dense_total": 4718592, "linear_dense_nnz": 250368}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 682752, "linear_attention_total": 2359296, "linear_attention_nnz": 484608, "linear_dense_total": 4718592, "linear_dense_nnz": 198144}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 980224, "linear_attention_total": 2359296, "linear_attention_nnz": 313600, "linear_dense_total": 4718592, "linear_dense_nnz": 666624}}, "total_sparsity": 54.79095450471988, "linear_sparsity": 70.2464916087963}, "speed": {"eval_elapsed_time": 20.163633761985693}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-60000": {"stats": {"total": 108893186, "nnz": 53943554, "linear_total": 84934656, "linear_nnz": 29985024, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3576576, "linear_attention_total": 2359296, "linear_attention_nnz": 840960, "linear_dense_total": 4718592, "linear_dense_nnz": 2735616}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3070464, "linear_attention_total": 2359296, "linear_attention_nnz": 288768, "linear_dense_total": 4718592, "linear_dense_nnz": 2781696}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3222528, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 3024384}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3771648, "linear_attention_total": 2359296, "linear_attention_nnz": 770304, "linear_dense_total": 4718592, "linear_dense_nnz": 3001344}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3129600, "linear_attention_total": 2359296, "linear_attention_nnz": 393984, "linear_dense_total": 4718592, "linear_dense_nnz": 2735616}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2864640, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 2668032}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2801664, "linear_attention_total": 2359296, "linear_attention_nnz": 548352, "linear_dense_total": 4718592, "linear_dense_nnz": 2253312}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2118144, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 1920000}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1646592, "linear_attention_total": 2359296, "linear_attention_nnz": 284160, "linear_dense_total": 4718592, "linear_dense_nnz": 1362432}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 861696, "linear_attention_total": 2359296, "linear_attention_nnz": 202752, "linear_dense_total": 4718592, "linear_dense_nnz": 658944}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1195776, "linear_attention_total": 2359296, "linear_attention_nnz": 288000, "linear_dense_total": 4718592, "linear_dense_nnz": 907776}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1725696, "linear_attention_total": 2359296, "linear_attention_nnz": 315648, "linear_dense_total": 4718592, "linear_dense_nnz": 1410048}}, "total_sparsity": 50.461956361530284, "linear_sparsity": 64.69636140046296}, "speed": {"eval_elapsed_time": 14.269501545990352}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 49808642, "linear_total": 84934656, "linear_nnz": 25850112, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2888448, "linear_attention_total": 2359296, "linear_attention_nnz": 652032, "linear_dense_total": 4718592, "linear_dense_nnz": 2236416}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2680320, "linear_attention_total": 2359296, "linear_attention_nnz": 293376, "linear_dense_total": 4718592, "linear_dense_nnz": 2386944}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2864640, "linear_attention_total": 2359296, "linear_attention_nnz": 198144, "linear_dense_total": 4718592, "linear_dense_nnz": 2666496}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 3171072, "linear_attention_total": 2359296, "linear_attention_nnz": 530688, "linear_dense_total": 4718592, "linear_dense_nnz": 2640384}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2757888, "linear_attention_total": 2359296, "linear_attention_nnz": 392448, "linear_dense_total": 4718592, "linear_dense_nnz": 2365440}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2569728, "linear_attention_total": 2359296, "linear_attention_nnz": 196608, "linear_dense_total": 4718592, "linear_dense_nnz": 2373120}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2244096, "linear_attention_total": 2359296, "linear_attention_nnz": 310272, "linear_dense_total": 4718592, "linear_dense_nnz": 1933824}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1843968, "linear_attention_total": 2359296, "linear_attention_nnz": 197376, "linear_dense_total": 4718592, "linear_dense_nnz": 1646592}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1384704, "linear_attention_total": 2359296, "linear_attention_nnz": 200448, "linear_dense_total": 4718592, "linear_dense_nnz": 1184256}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 760320, "linear_attention_total": 2359296, "linear_attention_nnz": 204288, "linear_dense_total": 4718592, "linear_dense_nnz": 556032}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1115904, "linear_attention_total": 2359296, "linear_attention_nnz": 286464, "linear_dense_total": 4718592, "linear_dense_nnz": 829440}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1569024, "linear_attention_total": 2359296, "linear_attention_nnz": 315648, "linear_dense_total": 4718592, "linear_dense_nnz": 1253376}}, "total_sparsity": 54.25917467416189, "linear_sparsity": 69.56470630787037}, "speed": {"eval_elapsed_time": 12.333724958996754}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": {"stats": {"total": 108893186, "nnz": 42173698, "linear_total": 84934656, "linear_nnz": 18215168, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1516544, "linear_attention_total": 2359296, "linear_attention_nnz": 542720, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1758464, "linear_attention_total": 2359296, "linear_attention_nnz": 564992, "linear_dense_total": 4718592, "linear_dense_nnz": 1193472}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2030080, "linear_attention_total": 2359296, "linear_attention_nnz": 646144, "linear_dense_total": 4718592, "linear_dense_nnz": 1383936}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2328832, "linear_attention_total": 2359296, "linear_attention_nnz": 969472, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2283264, "linear_attention_total": 2359296, "linear_attention_nnz": 857856, "linear_dense_total": 4718592, "linear_dense_nnz": 1425408}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2099200, "linear_attention_total": 2359296, "linear_attention_nnz": 702976, "linear_dense_total": 4718592, "linear_dense_nnz": 1396224}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1846784, "linear_attention_total": 2359296, "linear_attention_nnz": 774656, "linear_dense_total": 4718592, "linear_dense_nnz": 1072128}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1589760, "linear_attention_total": 2359296, "linear_attention_nnz": 806400, "linear_dense_total": 4718592, "linear_dense_nnz": 783360}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 967424, "linear_attention_total": 2359296, "linear_attention_nnz": 520448, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 617216, "linear_attention_total": 2359296, "linear_attention_nnz": 435968, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 521984, "linear_attention_total": 2359296, "linear_attention_nnz": 354560, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 655616, "linear_attention_total": 2359296, "linear_attention_nnz": 231680, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.27058124647028, "linear_sparsity": 78.55390383873457}, "speed": {"eval_elapsed_time": 16.621274546021596}}, "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": {"stats": {"total": 108893186, "nnz": 42038274, "linear_total": 84934656, "linear_nnz": 18079744, "layers": {"null": {"total": 2, "nnz": 2}, "0": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1493248, "linear_attention_total": 2359296, "linear_attention_nnz": 519424, "linear_dense_total": 4718592, "linear_dense_nnz": 973824}, "1": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1757440, "linear_attention_total": 2359296, "linear_attention_nnz": 565504, "linear_dense_total": 4718592, "linear_dense_nnz": 1191936}, "2": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2028800, "linear_attention_total": 2359296, "linear_attention_nnz": 646400, "linear_dense_total": 4718592, "linear_dense_nnz": 1382400}, "3": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2297088, "linear_attention_total": 2359296, "linear_attention_nnz": 937728, "linear_dense_total": 4718592, "linear_dense_nnz": 1359360}, "4": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2270464, "linear_attention_total": 2359296, "linear_attention_nnz": 846592, "linear_dense_total": 4718592, "linear_dense_nnz": 1423872}, "5": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 2081792, "linear_attention_total": 2359296, "linear_attention_nnz": 688640, "linear_dense_total": 4718592, "linear_dense_nnz": 1393152}, "6": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1815296, "linear_attention_total": 2359296, "linear_attention_nnz": 744704, "linear_dense_total": 4718592, "linear_dense_nnz": 1070592}, "7": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 1613312, "linear_attention_total": 2359296, "linear_attention_nnz": 831488, "linear_dense_total": 4718592, "linear_dense_nnz": 781824}, "8": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 969472, "linear_attention_total": 2359296, "linear_attention_nnz": 522496, "linear_dense_total": 4718592, "linear_dense_nnz": 446976}, "9": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 594944, "linear_attention_total": 2359296, "linear_attention_nnz": 413696, "linear_dense_total": 4718592, "linear_dense_nnz": 181248}, "10": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 513792, "linear_attention_total": 2359296, "linear_attention_nnz": 346368, "linear_dense_total": 4718592, "linear_dense_nnz": 167424}, "11": {"total": 768, "nnz": 768, "linear_total": 7077888, "linear_nnz": 644096, "linear_attention_total": 2359296, "linear_attention_nnz": 220160, "linear_dense_total": 4718592, "linear_dense_nnz": 423936}}, "total_sparsity": 61.39494531824976, "linear_sparsity": 78.7133487654321}, "speed": {"eval_elapsed_time": 16.59519276500214}}}, "base_speed_report": {"eval_elapsed_time": 39.27680186094949, "optimize_mode": "disabled"}} \ No newline at end of file diff --git a/analysis/files/test.json b/analysis/files/test.json deleted file mode 100644 index faf50a8c..00000000 --- a/analysis/files/test.json +++ /dev/null @@ -1,40061 +0,0 @@ -{ - "base_speed_report": { - "cuda_eval_elapsed_time": 38.594393005371096, - "eval_elapsed_time": 45.63197132572532 - }, - "checkpoints": { - "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 79.15799432355723, - "f1": 86.94169166073364 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5 - }, - "speed": { - "cuda_eval_elapsed_time": 22.747020225524903, - "eval_elapsed_time": 29.958857133984566 - }, - "speedup": 1.6966790648941144, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 427776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1617408, - "linear_dense_total": 4718592, - "linear_nnz": 2045184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 394752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1708032, - "linear_dense_total": 4718592, - "linear_nnz": 2102784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 182784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 245760, - "linear_dense_total": 4718592, - "linear_nnz": 428544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 112128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 626688, - "linear_dense_total": 4718592, - "linear_nnz": 738816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 469248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1955328, - "linear_dense_total": 4718592, - "linear_nnz": 2424576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 579840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1923072, - "linear_dense_total": 4718592, - "linear_nnz": 2502912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 539904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1837056, - "linear_dense_total": 4718592, - "linear_nnz": 2376960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 424704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1777152, - "linear_dense_total": 4718592, - "linear_nnz": 2201856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 439296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1468416, - "linear_dense_total": 4718592, - "linear_nnz": 1907712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 428544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1152000, - "linear_dense_total": 4718592, - "linear_nnz": 1580544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 397824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 697344, - "linear_dense_total": 4718592, - "linear_nnz": 1095168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 235776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 291840, - "linear_dense_total": 4718592, - "linear_nnz": 527616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 19932672, - "linear_sparsity": 76.53175636574075, - "linear_total": 84934656, - "nnz": 43891202, - "total": 108893186, - "total_sparsity": 59.6933438975695 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "eval_metrics": { - "exact_match": 79.46073793755913, - "f1": 87.08591835424342 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 16.44181579208374, - "eval_elapsed_time": 23.62707085069269 - }, - "speedup": 2.3473315534865185, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 542720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 973824, - "linear_dense_total": 4718592, - "linear_nnz": 1516544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 564992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1193472, - "linear_dense_total": 4718592, - "linear_nnz": 1758464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 354560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 167424, - "linear_dense_total": 4718592, - "linear_nnz": 521984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 231680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 423936, - "linear_dense_total": 4718592, - "linear_nnz": 655616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 646144, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1383936, - "linear_dense_total": 4718592, - "linear_nnz": 2030080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 969472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1359360, - "linear_dense_total": 4718592, - "linear_nnz": 2328832, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 857856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1425408, - "linear_dense_total": 4718592, - "linear_nnz": 2283264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 702976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1396224, - "linear_dense_total": 4718592, - "linear_nnz": 2099200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 774656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1072128, - "linear_dense_total": 4718592, - "linear_nnz": 1846784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 806400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 783360, - "linear_dense_total": 4718592, - "linear_nnz": 1589760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 520448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 446976, - "linear_dense_total": 4718592, - "linear_nnz": 967424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 435968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 181248, - "linear_dense_total": 4718592, - "linear_nnz": 617216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 18215168, - "linear_sparsity": 78.55390383873457, - "linear_total": 84934656, - "nnz": 42173698, - "total": 108893186, - "total_sparsity": 61.27058124647028 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 79.2620624408704, - "f1": 86.97825692623259 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 16.405798454284668, - "eval_elapsed_time": 23.622337056789547 - }, - "speedup": 2.3524848920286154, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 519424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 973824, - "linear_dense_total": 4718592, - "linear_nnz": 1493248, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 565504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1191936, - "linear_dense_total": 4718592, - "linear_nnz": 1757440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 346368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 167424, - "linear_dense_total": 4718592, - "linear_nnz": 513792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 220160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 423936, - "linear_dense_total": 4718592, - "linear_nnz": 644096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 646400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1382400, - "linear_dense_total": 4718592, - "linear_nnz": 2028800, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 937728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1359360, - "linear_dense_total": 4718592, - "linear_nnz": 2297088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 846592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1423872, - "linear_dense_total": 4718592, - "linear_nnz": 2270464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 688640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1393152, - "linear_dense_total": 4718592, - "linear_nnz": 2081792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 744704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1070592, - "linear_dense_total": 4718592, - "linear_nnz": 1815296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 831488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 781824, - "linear_dense_total": 4718592, - "linear_nnz": 1613312, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 522496, - "linear_attention_total": 2359296, - "linear_dense_nnz": 446976, - "linear_dense_total": 4718592, - "linear_nnz": 969472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 413696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 181248, - "linear_dense_total": 4718592, - "linear_nnz": 594944, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 18079744, - "linear_sparsity": 78.7133487654321, - "linear_total": 84934656, - "nnz": 42038274, - "total": 108893186, - "total_sparsity": 61.39494531824976 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 80.6244087038789, - "f1": 88.07723643002453 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5 - }, - "speed": { - "cuda_eval_elapsed_time": 19.890604362487792, - "eval_elapsed_time": 27.08285549096763 - }, - "speedup": 1.9403328477116193, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 721408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1492992, - "linear_dense_total": 4718592, - "linear_nnz": 2214400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 635136, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1755648, - "linear_dense_total": 4718592, - "linear_nnz": 2390784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 484608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 198144, - "linear_dense_total": 4718592, - "linear_nnz": 682752, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 313600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 666624, - "linear_dense_total": 4718592, - "linear_nnz": 980224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 972032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1878528, - "linear_dense_total": 4718592, - "linear_nnz": 2850560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1256448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1932288, - "linear_dense_total": 4718592, - "linear_nnz": 3188736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1260544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1889280, - "linear_dense_total": 4718592, - "linear_nnz": 3149824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1121280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1784832, - "linear_dense_total": 4718592, - "linear_nnz": 2906112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1061888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1393152, - "linear_dense_total": 4718592, - "linear_nnz": 2455040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 988160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1027584, - "linear_dense_total": 4718592, - "linear_nnz": 2015744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 903424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 646656, - "linear_dense_total": 4718592, - "linear_nnz": 1550080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 636416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 250368, - "linear_dense_total": 4718592, - "linear_nnz": 886784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 25271040, - "linear_sparsity": 70.2464916087963, - "linear_total": 84934656, - "nnz": 49229570, - "total": 108893186, - "total_sparsity": 54.79095450471988 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold_apme-sigmoied_threshold_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 79.84862819299906, - "f1": 87.52317853046331 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 24.480328201293947, - "eval_elapsed_time": 31.782106802333146 - }, - "speedup": 1.5765472050873537, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 614400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2607104, - "linear_dense_total": 4718592, - "linear_nnz": 3221504, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 604160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2899968, - "linear_dense_total": 4718592, - "linear_nnz": 3504128, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 401408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 451584, - "linear_dense_total": 4718592, - "linear_nnz": 852992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 244736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 501760, - "linear_dense_total": 4718592, - "linear_nnz": 746496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 730112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3708928, - "linear_dense_total": 4718592, - "linear_nnz": 4439040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1044480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3815424, - "linear_dense_total": 4718592, - "linear_nnz": 4859904, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1012736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3722240, - "linear_dense_total": 4718592, - "linear_nnz": 4734976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 882688, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3777536, - "linear_dense_total": 4718592, - "linear_nnz": 4660224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 980992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3258368, - "linear_dense_total": 4718592, - "linear_nnz": 4239360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 903168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2234368, - "linear_dense_total": 4718592, - "linear_nnz": 3137536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 710656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1124352, - "linear_dense_total": 4718592, - "linear_nnz": 1835008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 552960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 324608, - "linear_dense_total": 4718592, - "linear_nnz": 877568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 37108736, - "linear_sparsity": 56.309076003086425, - "linear_total": 84934656, - "nnz": 61067266, - "total": 108893186, - "total_sparsity": 43.920030037508496 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": { - "eval_metrics": { - "exact_match": 79.99053926206244, - "f1": 87.56439208763325 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 2, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 19.37784966278076, - "eval_elapsed_time": 26.613120706751943 - }, - "speedup": 1.9916757368336773, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 684800, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2007552, - "linear_dense_total": 4718592, - "linear_nnz": 2692352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 646656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2019840, - "linear_dense_total": 4718592, - "linear_nnz": 2666496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 432128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 274944, - "linear_dense_total": 4718592, - "linear_nnz": 707072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 277760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 794112, - "linear_dense_total": 4718592, - "linear_nnz": 1071872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 691712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2239488, - "linear_dense_total": 4718592, - "linear_nnz": 2931200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1149184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2211840, - "linear_dense_total": 4718592, - "linear_nnz": 3361024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1007872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2158080, - "linear_dense_total": 4718592, - "linear_nnz": 3165952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 997376, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2073600, - "linear_dense_total": 4718592, - "linear_nnz": 3070976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 911872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1732608, - "linear_dense_total": 4718592, - "linear_nnz": 2644480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 944640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1304064, - "linear_dense_total": 4718592, - "linear_nnz": 2248704, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 763136, - "linear_attention_total": 2359296, - "linear_dense_nnz": 751104, - "linear_dense_total": 4718592, - "linear_nnz": 1514240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 526080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 313344, - "linear_dense_total": 4718592, - "linear_nnz": 839424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 26913792, - "linear_sparsity": 68.31235532407408, - "linear_total": 84934656, - "nnz": 50872322, - "total": 108893186, - "total_sparsity": 53.282364242699266 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test2", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 12, - "output_dir": "output/squad_test2", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test2", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": { - "eval_metrics": { - "exact_match": 79.7918637653737, - "f1": 87.14951283583915 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 2, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 15 - }, - "speed": { - "cuda_eval_elapsed_time": 17.153405197143556, - "eval_elapsed_time": 24.343701715115458 - }, - "speedup": 2.249955187428206, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 551680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1539072, - "linear_dense_total": 4718592, - "linear_nnz": 2090752, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 596736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1681920, - "linear_dense_total": 4718592, - "linear_nnz": 2278656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 361728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 236544, - "linear_dense_total": 4718592, - "linear_nnz": 598272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 238336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 620544, - "linear_dense_total": 4718592, - "linear_nnz": 858880, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 567808, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1850880, - "linear_dense_total": 4718592, - "linear_nnz": 2418688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1002752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1841664, - "linear_dense_total": 4718592, - "linear_nnz": 2844416, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 878592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1812480, - "linear_dense_total": 4718592, - "linear_nnz": 2691072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 721152, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1754112, - "linear_dense_total": 4718592, - "linear_nnz": 2475264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 805376, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1423872, - "linear_dense_total": 4718592, - "linear_nnz": 2229248, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 892672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1073664, - "linear_dense_total": 4718592, - "linear_nnz": 1966336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 460800, - "linear_attention_total": 2359296, - "linear_dense_nnz": 620544, - "linear_dense_total": 4718592, - "linear_nnz": 1081344, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 454144, - "linear_attention_total": 2359296, - "linear_dense_nnz": 247296, - "linear_dense_total": 4718592, - "linear_nnz": 701440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 22234368, - "linear_sparsity": 73.82179542824075, - "linear_total": 84934656, - "nnz": 46192898, - "total": 108893186, - "total_sparsity": 57.57962486284496 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test2", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 12, - "output_dir": "output/squad_test2", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test2", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 78.94985808893094, - "f1": 86.83052028636654 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 64, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 15.454076248168946, - "eval_elapsed_time": 22.627552575897425 - }, - "speedup": 2.4973600741709747, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 543488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1016832, - "linear_dense_total": 4718592, - "linear_nnz": 1560320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 593664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1222656, - "linear_dense_total": 4718592, - "linear_nnz": 1816320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 409088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 178176, - "linear_dense_total": 4718592, - "linear_nnz": 587264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 250880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 446976, - "linear_dense_total": 4718592, - "linear_nnz": 697856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 603904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1420800, - "linear_dense_total": 4718592, - "linear_nnz": 2024704, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 870656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1459200, - "linear_dense_total": 4718592, - "linear_nnz": 2329856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 887552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1445376, - "linear_dense_total": 4718592, - "linear_nnz": 2332928, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 720640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1370112, - "linear_dense_total": 4718592, - "linear_nnz": 2090752, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 806400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1081344, - "linear_dense_total": 4718592, - "linear_nnz": 1887744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 926464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 815616, - "linear_dense_total": 4718592, - "linear_nnz": 1742080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 455936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 488448, - "linear_dense_total": 4718592, - "linear_nnz": 944384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 505600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 199680, - "linear_dense_total": 4718592, - "linear_nnz": 705280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 18719488, - "linear_sparsity": 77.96012972608024, - "linear_total": 84934656, - "nnz": 42678018, - "total": 108893186, - "total_sparsity": 60.80744850279245 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test2", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test2", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test2", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": { - "eval_metrics": { - "exact_match": 79.15799432355723, - "f1": 87.0225802715423 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 64, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 15.710145233154297, - "eval_elapsed_time": 22.9045692961663 - }, - "speedup": 2.456654119525417, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 579328, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1081344, - "linear_dense_total": 4718592, - "linear_nnz": 1660672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 632576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1267200, - "linear_dense_total": 4718592, - "linear_nnz": 1899776, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 448256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 182784, - "linear_dense_total": 4718592, - "linear_nnz": 631040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 288256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 462336, - "linear_dense_total": 4718592, - "linear_nnz": 750592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 584192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1446912, - "linear_dense_total": 4718592, - "linear_nnz": 2031104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1049600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1494528, - "linear_dense_total": 4718592, - "linear_nnz": 2544128, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 916736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1479168, - "linear_dense_total": 4718592, - "linear_nnz": 2395904, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 790272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1394688, - "linear_dense_total": 4718592, - "linear_nnz": 2184960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 798720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1113600, - "linear_dense_total": 4718592, - "linear_nnz": 1912320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 969216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 837120, - "linear_dense_total": 4718592, - "linear_nnz": 1806336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 471808, - "linear_attention_total": 2359296, - "linear_dense_nnz": 497664, - "linear_dense_total": 4718592, - "linear_nnz": 969472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 505344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 211968, - "linear_dense_total": 4718592, - "linear_nnz": 717312, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 19503616, - "linear_sparsity": 77.03691647376543, - "linear_total": 84934656, - "nnz": 43462146, - "total": 108893186, - "total_sparsity": 60.08735936884057 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test2", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test2", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test2", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": { - "eval_metrics": { - "exact_match": 78.97824030274361, - "f1": 86.76378852886562 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 17.205588718414308, - "eval_elapsed_time": 24.35187277989462 - }, - "speedup": 2.243131207946717, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 476160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 929280, - "linear_dense_total": 4718592, - "linear_nnz": 1405440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 589568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1142784, - "linear_dense_total": 4718592, - "linear_nnz": 1732352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 378624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 144384, - "linear_dense_total": 4718592, - "linear_nnz": 523008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 208384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 431616, - "linear_dense_total": 4718592, - "linear_nnz": 640000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 628992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1350144, - "linear_dense_total": 4718592, - "linear_nnz": 1979136, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 913152, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1305600, - "linear_dense_total": 4718592, - "linear_nnz": 2218752, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 850688, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1406976, - "linear_dense_total": 4718592, - "linear_nnz": 2257664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 764672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1331712, - "linear_dense_total": 4718592, - "linear_nnz": 2096384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 763136, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1022976, - "linear_dense_total": 4718592, - "linear_nnz": 1786112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 781568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 757248, - "linear_dense_total": 4718592, - "linear_nnz": 1538816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 596224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 431616, - "linear_dense_total": 4718592, - "linear_nnz": 1027840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 394752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 176640, - "linear_dense_total": 4718592, - "linear_nnz": 571392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 17776896, - "linear_sparsity": 79.0699146412037, - "linear_total": 84934656, - "nnz": 41735426, - "total": 108893186, - "total_sparsity": 61.67306005721974 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test2", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test2", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test2", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": { - "eval_metrics": { - "exact_match": 79.63103122043519, - "f1": 87.27956184118273 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 17.41685264968872, - "eval_elapsed_time": 24.593657957855612 - }, - "speedup": 2.215922347259501, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 472576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 964608, - "linear_dense_total": 4718592, - "linear_nnz": 1437184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 604160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1150464, - "linear_dense_total": 4718592, - "linear_nnz": 1754624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 395008, - "linear_attention_total": 2359296, - "linear_dense_nnz": 145920, - "linear_dense_total": 4718592, - "linear_nnz": 540928, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 217600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 440832, - "linear_dense_total": 4718592, - "linear_nnz": 658432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 634624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1380864, - "linear_dense_total": 4718592, - "linear_nnz": 2015488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 951040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1325568, - "linear_dense_total": 4718592, - "linear_nnz": 2276608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 861184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1419264, - "linear_dense_total": 4718592, - "linear_nnz": 2280448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 779008, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1344000, - "linear_dense_total": 4718592, - "linear_nnz": 2123008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 799744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1041408, - "linear_dense_total": 4718592, - "linear_nnz": 1841152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 790272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 763392, - "linear_dense_total": 4718592, - "linear_nnz": 1553664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 610816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 431616, - "linear_dense_total": 4718592, - "linear_nnz": 1042432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 405248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 179712, - "linear_dense_total": 4718592, - "linear_nnz": 584960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 18108928, - "linear_sparsity": 78.6789882330247, - "linear_total": 84934656, - "nnz": 42067458, - "total": 108893186, - "total_sparsity": 61.3681447432349 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test2", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test2", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test2", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 78.7038789025544, - "f1": 86.58426699451658 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 15 - }, - "speed": { - "cuda_eval_elapsed_time": 15.051653835296632, - "eval_elapsed_time": 22.226274209097028 - }, - "speedup": 2.56412972472606, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 459776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 749568, - "linear_dense_total": 4718592, - "linear_nnz": 1209344, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 488192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1006080, - "linear_dense_total": 4718592, - "linear_nnz": 1494272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 311040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 148992, - "linear_dense_total": 4718592, - "linear_nnz": 460032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 207360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 311808, - "linear_dense_total": 4718592, - "linear_nnz": 519168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 550144, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1085952, - "linear_dense_total": 4718592, - "linear_nnz": 1636096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 868352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1101312, - "linear_dense_total": 4718592, - "linear_nnz": 1969664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 548864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1198080, - "linear_dense_total": 4718592, - "linear_nnz": 1746944, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 653312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1128960, - "linear_dense_total": 4718592, - "linear_nnz": 1782272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 593920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 867840, - "linear_dense_total": 4718592, - "linear_nnz": 1461760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 721920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 669696, - "linear_dense_total": 4718592, - "linear_nnz": 1391616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 367616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 387072, - "linear_dense_total": 4718592, - "linear_nnz": 754688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 373760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 158208, - "linear_dense_total": 4718592, - "linear_nnz": 531968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 14957824, - "linear_sparsity": 82.38902150848766, - "linear_total": 84934656, - "nnz": 38916354, - "total": 108893186, - "total_sparsity": 64.26190156654981 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test2", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test2", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test2", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-130000": { - "eval_metrics": { - "exact_match": 78.9593188268685, - "f1": 86.73375735833712 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 15, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 15.435867366790772, - "eval_elapsed_time": 22.581966675817966 - }, - "speedup": 2.500306078581909, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 488448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 562176, - "linear_dense_total": 4718592, - "linear_nnz": 1050624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 512512, - "linear_attention_total": 2359296, - "linear_dense_nnz": 870912, - "linear_dense_total": 4718592, - "linear_nnz": 1383424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 367360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 116736, - "linear_dense_total": 4718592, - "linear_nnz": 484096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 225024, - "linear_attention_total": 2359296, - "linear_dense_nnz": 271872, - "linear_dense_total": 4718592, - "linear_nnz": 496896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 628224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 999936, - "linear_dense_total": 4718592, - "linear_nnz": 1628160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 937216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1061376, - "linear_dense_total": 4718592, - "linear_nnz": 1998592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 821760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1118208, - "linear_dense_total": 4718592, - "linear_nnz": 1939968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 648448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1061376, - "linear_dense_total": 4718592, - "linear_nnz": 1709824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 641536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 763392, - "linear_dense_total": 4718592, - "linear_nnz": 1404928, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 755712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 605184, - "linear_dense_total": 4718592, - "linear_nnz": 1360896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 467712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 350208, - "linear_dense_total": 4718592, - "linear_nnz": 817920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 403200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 141312, - "linear_dense_total": 4718592, - "linear_nnz": 544512, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 14819840, - "linear_sparsity": 82.5514805169753, - "linear_total": 84934656, - "nnz": 38778370, - "total": 108893186, - "total_sparsity": 64.38861656596218 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test2", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 30, - "output_dir": "output/squad_test2", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test2", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-165000": { - "eval_metrics": { - "exact_match": 78.96877956480606, - "f1": 86.71968503618079 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 15, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 15.359982524871826, - "eval_elapsed_time": 22.516427854076028 - }, - "speedup": 2.512658653281453, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 468480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 542208, - "linear_dense_total": 4718592, - "linear_nnz": 1010688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 518912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 852480, - "linear_dense_total": 4718592, - "linear_nnz": 1371392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 345344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 110592, - "linear_dense_total": 4718592, - "linear_nnz": 455936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 212992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 261120, - "linear_dense_total": 4718592, - "linear_nnz": 474112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 608768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 981504, - "linear_dense_total": 4718592, - "linear_nnz": 1590272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 869888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1026048, - "linear_dense_total": 4718592, - "linear_nnz": 1895936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 775936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1093632, - "linear_dense_total": 4718592, - "linear_nnz": 1869568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 618752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1044480, - "linear_dense_total": 4718592, - "linear_nnz": 1663232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 629248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 754176, - "linear_dense_total": 4718592, - "linear_nnz": 1383424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 707584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 588288, - "linear_dense_total": 4718592, - "linear_nnz": 1295872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 463104, - "linear_attention_total": 2359296, - "linear_dense_nnz": 345600, - "linear_dense_total": 4718592, - "linear_nnz": 808704, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 376064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 139776, - "linear_dense_total": 4718592, - "linear_nnz": 515840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 14334976, - "linear_sparsity": 83.1223476080247, - "linear_total": 84934656, - "nnz": 38293506, - "total": 108893186, - "total_sparsity": 64.83388225963009 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test2", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 30, - "output_dir": "output/squad_test2", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test2", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": { - "eval_metrics": { - "exact_match": 78.74172185430463, - "f1": 86.69521763053608 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 17.306304389953613, - "eval_elapsed_time": 24.480814102105796 - }, - "speedup": 2.230077094204775, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 468992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 940032, - "linear_dense_total": 4718592, - "linear_nnz": 1409024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 606208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1185792, - "linear_dense_total": 4718592, - "linear_nnz": 1792000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 378112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 147456, - "linear_dense_total": 4718592, - "linear_nnz": 525568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 207360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 419328, - "linear_dense_total": 4718592, - "linear_nnz": 626688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 625664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1348608, - "linear_dense_total": 4718592, - "linear_nnz": 1974272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 910592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1320960, - "linear_dense_total": 4718592, - "linear_nnz": 2231552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 828672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1380864, - "linear_dense_total": 4718592, - "linear_nnz": 2209536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 765440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1281024, - "linear_dense_total": 4718592, - "linear_nnz": 2046464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 761088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1003008, - "linear_dense_total": 4718592, - "linear_nnz": 1764096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 792832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 780288, - "linear_dense_total": 4718592, - "linear_nnz": 1573120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 553728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 433152, - "linear_dense_total": 4718592, - "linear_nnz": 986880, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 389888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 182784, - "linear_dense_total": 4718592, - "linear_nnz": 572672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 17711872, - "linear_sparsity": 79.14647231867285, - "linear_total": 84934656, - "nnz": 41670402, - "total": 108893186, - "total_sparsity": 61.73277361909495 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 78.84578997161779, - "f1": 86.78133258210022 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 17.32754041290283, - "eval_elapsed_time": 24.51584801170975 - }, - "speedup": 2.2273439903006693, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 465664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 938496, - "linear_dense_total": 4718592, - "linear_nnz": 1404160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 584192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1182720, - "linear_dense_total": 4718592, - "linear_nnz": 1766912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 370432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 145920, - "linear_dense_total": 4718592, - "linear_nnz": 516352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 200960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 414720, - "linear_dense_total": 4718592, - "linear_nnz": 615680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 615680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1345536, - "linear_dense_total": 4718592, - "linear_nnz": 1961216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 895488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1314816, - "linear_dense_total": 4718592, - "linear_nnz": 2210304, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 812032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1377792, - "linear_dense_total": 4718592, - "linear_nnz": 2189824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 755456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1282560, - "linear_dense_total": 4718592, - "linear_nnz": 2038016, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 739840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 998400, - "linear_dense_total": 4718592, - "linear_nnz": 1738240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 797440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 774144, - "linear_dense_total": 4718592, - "linear_nnz": 1571584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 513792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 430080, - "linear_dense_total": 4718592, - "linear_nnz": 943872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 381184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 182784, - "linear_dense_total": 4718592, - "linear_nnz": 563968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 17520128, - "linear_sparsity": 79.37222704475309, - "linear_total": 84934656, - "nnz": 41478658, - "total": 108893186, - "total_sparsity": 61.90885809879785 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": { - "eval_metrics": { - "exact_match": 79.09176915799432, - "f1": 86.94165524295053 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 17.906531421661377, - "eval_elapsed_time": 25.082025394309312 - }, - "speedup": 2.155324897745623, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 494336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1090560, - "linear_dense_total": 4718592, - "linear_nnz": 1584896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 631552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1285632, - "linear_dense_total": 4718592, - "linear_nnz": 1917184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 411392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 156672, - "linear_dense_total": 4718592, - "linear_nnz": 568064, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 223232, - "linear_attention_total": 2359296, - "linear_dense_nnz": 460800, - "linear_dense_total": 4718592, - "linear_nnz": 684032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 648192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1443840, - "linear_dense_total": 4718592, - "linear_nnz": 2092032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1047552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1419264, - "linear_dense_total": 4718592, - "linear_nnz": 2466816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 942592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1460736, - "linear_dense_total": 4718592, - "linear_nnz": 2403328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 837888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1373184, - "linear_dense_total": 4718592, - "linear_nnz": 2211072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 841472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1095168, - "linear_dense_total": 4718592, - "linear_nnz": 1936640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 833536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 827904, - "linear_dense_total": 4718592, - "linear_nnz": 1661440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 621824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 462336, - "linear_dense_total": 4718592, - "linear_nnz": 1084160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 432128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 188928, - "linear_dense_total": 4718592, - "linear_nnz": 621056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 19230720, - "linear_sparsity": 77.3582175925926, - "linear_total": 84934656, - "nnz": 43189250, - "total": 108893186, - "total_sparsity": 60.33796825450584 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": { - "eval_metrics": { - "exact_match": 79.11069063386944, - "f1": 86.88040012719469 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 17.75180138397217, - "eval_elapsed_time": 24.917593302205205 - }, - "speedup": 2.174111357522138, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 480256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 970752, - "linear_dense_total": 4718592, - "linear_nnz": 1451008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 620288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1214976, - "linear_dense_total": 4718592, - "linear_nnz": 1835264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 392704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 150528, - "linear_dense_total": 4718592, - "linear_nnz": 543232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 214784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 434688, - "linear_dense_total": 4718592, - "linear_nnz": 649472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 625664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1374720, - "linear_dense_total": 4718592, - "linear_nnz": 2000384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 933376, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1337856, - "linear_dense_total": 4718592, - "linear_nnz": 2271232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 862464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1405440, - "linear_dense_total": 4718592, - "linear_nnz": 2267904, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 783616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1297920, - "linear_dense_total": 4718592, - "linear_nnz": 2081536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 773376, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1033728, - "linear_dense_total": 4718592, - "linear_nnz": 1807104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 811008, - "linear_attention_total": 2359296, - "linear_dense_nnz": 791040, - "linear_dense_total": 4718592, - "linear_nnz": 1602048, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 572160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 437760, - "linear_dense_total": 4718592, - "linear_nnz": 1009920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 405504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 187392, - "linear_dense_total": 4718592, - "linear_nnz": 592896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 18112000, - "linear_sparsity": 78.67537133487654, - "linear_total": 84934656, - "nnz": 42070530, - "total": 108893186, - "total_sparsity": 61.365323630075444 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 76.71712393566698, - "f1": 85.09538457703842 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 30 - }, - "speed": { - "cuda_eval_elapsed_time": 14.06190474319458, - "eval_elapsed_time": 21.232633945997804 - }, - "speedup": 2.744606346736156, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 295680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 405504, - "linear_dense_total": 4718592, - "linear_nnz": 701184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 380672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 662016, - "linear_dense_total": 4718592, - "linear_nnz": 1042688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 202240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 113664, - "linear_dense_total": 4718592, - "linear_nnz": 315904, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 129536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 178176, - "linear_dense_total": 4718592, - "linear_nnz": 307712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 328960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 758784, - "linear_dense_total": 4718592, - "linear_nnz": 1087744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 612608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 728064, - "linear_dense_total": 4718592, - "linear_nnz": 1340672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 331776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 811008, - "linear_dense_total": 4718592, - "linear_nnz": 1142784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 411136, - "linear_attention_total": 2359296, - "linear_dense_nnz": 754176, - "linear_dense_total": 4718592, - "linear_nnz": 1165312, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 319744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 588288, - "linear_dense_total": 4718592, - "linear_nnz": 908032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 457472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 499200, - "linear_dense_total": 4718592, - "linear_nnz": 956672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 246784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 310272, - "linear_dense_total": 4718592, - "linear_nnz": 557056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 252672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 107520, - "linear_dense_total": 4718592, - "linear_nnz": 360192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 9885952, - "linear_sparsity": 88.36052035108025, - "linear_total": 84934656, - "nnz": 33844482, - "total": 108893186, - "total_sparsity": 68.91955939281638 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-10000": { - "eval_metrics": { - "exact_match": 80.90823084200568, - "f1": 88.13888839423888 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": true, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 14.99999 - }, - "speed": { - "cuda_eval_elapsed_time": 40.403957000732426, - "eval_elapsed_time": 47.70582241564989 - }, - "speedup": 0.9552131986644643, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 2151936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4713984, - "linear_dense_total": 4718592, - "linear_nnz": 6865920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 2299648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4709376, - "linear_dense_total": 4718592, - "linear_nnz": 7009024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 2285568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4638720, - "linear_dense_total": 4718592, - "linear_nnz": 6924288, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 2312448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4687872, - "linear_dense_total": 4718592, - "linear_nnz": 7000320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 2330112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4707840, - "linear_dense_total": 4718592, - "linear_nnz": 7037952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 2330112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4710912, - "linear_dense_total": 4718592, - "linear_nnz": 7041024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 2324992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4704768, - "linear_dense_total": 4718592, - "linear_nnz": 7029760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 2337280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4706304, - "linear_dense_total": 4718592, - "linear_nnz": 7043584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 2321664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4684800, - "linear_dense_total": 4718592, - "linear_nnz": 7006464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 2342400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4683264, - "linear_dense_total": 4718592, - "linear_nnz": 7025664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 2296576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4654080, - "linear_dense_total": 4718592, - "linear_nnz": 6950656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 2259200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4646400, - "linear_dense_total": 4718592, - "linear_nnz": 6905600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 83840256, - "linear_sparsity": 1.288519965277779, - "linear_total": 84934656, - "nnz": 107798786, - "total": 108893186, - "total_sparsity": 1.005021563057218 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-15000": { - "eval_metrics": { - "exact_match": 78.21192052980132, - "f1": 86.2154189083501 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": true, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 14.99999 - }, - "speed": { - "cuda_eval_elapsed_time": 39.830447120666506, - "eval_elapsed_time": 47.13309640903026 - }, - "speedup": 0.968967104196677, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1914624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4678656, - "linear_dense_total": 4718592, - "linear_nnz": 6593280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 2103296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4669440, - "linear_dense_total": 4718592, - "linear_nnz": 6772736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 2053632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4353024, - "linear_dense_total": 4718592, - "linear_nnz": 6406656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 2100480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4538880, - "linear_dense_total": 4718592, - "linear_nnz": 6639360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 2239232, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4646400, - "linear_dense_total": 4718592, - "linear_nnz": 6885632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 2219520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4657152, - "linear_dense_total": 4718592, - "linear_nnz": 6876672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 2216448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4657152, - "linear_dense_total": 4718592, - "linear_nnz": 6873600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 2226176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4615680, - "linear_dense_total": 4718592, - "linear_nnz": 6841856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 2190848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4595712, - "linear_dense_total": 4718592, - "linear_nnz": 6786560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 2261760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4549632, - "linear_dense_total": 4718592, - "linear_nnz": 6811392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 2178048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4431360, - "linear_dense_total": 4718592, - "linear_nnz": 6609408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 2049792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4349952, - "linear_dense_total": 4718592, - "linear_nnz": 6399744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 80496896, - "linear_sparsity": 5.224910783179015, - "linear_total": 84934656, - "nnz": 104455426, - "total": 108893186, - "total_sparsity": 4.075333051601593 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.9999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-5000": { - "eval_metrics": { - "exact_match": 78.7038789025544, - "f1": 86.6699349353281 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": true, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 14.9999 - }, - "speed": { - "cuda_eval_elapsed_time": 39.58176746368408, - "eval_elapsed_time": 46.91258597606793 - }, - "speedup": 0.975054816356574, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 2354176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7072768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7077888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7077888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4715520, - "linear_dense_total": 4718592, - "linear_nnz": 7074816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4717056, - "linear_dense_total": 4718592, - "linear_nnz": 7076352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7077888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7077888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7077888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7077888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7077888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 2358272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7076864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 2358272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7076864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 84922880, - "linear_sparsity": 0.013864776234573384, - "linear_total": 84934656, - "nnz": 108881410, - "total": 108893186, - "total_sparsity": 0.010814267111258768 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 77.94701986754967, - "f1": 86.06827252573265 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 14.216132064819336, - "eval_elapsed_time": 21.342612544074655 - }, - "speedup": 2.7148307872632, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 439296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 605184, - "linear_dense_total": 4718592, - "linear_nnz": 1044480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 367616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 809472, - "linear_dense_total": 4718592, - "linear_nnz": 1177088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 276224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 135168, - "linear_dense_total": 4718592, - "linear_nnz": 411392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 178176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 251904, - "linear_dense_total": 4718592, - "linear_nnz": 430080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 492032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 958464, - "linear_dense_total": 4718592, - "linear_nnz": 1450496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 733696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 918528, - "linear_dense_total": 4718592, - "linear_nnz": 1652224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 461056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1050624, - "linear_dense_total": 4718592, - "linear_nnz": 1511680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 580096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 953856, - "linear_dense_total": 4718592, - "linear_nnz": 1533952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 462592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 764928, - "linear_dense_total": 4718592, - "linear_nnz": 1227520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 624384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 571392, - "linear_dense_total": 4718592, - "linear_nnz": 1195776, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 351744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 348672, - "linear_dense_total": 4718592, - "linear_nnz": 700416, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 339968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 139776, - "linear_dense_total": 4718592, - "linear_nnz": 479744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 12814848, - "linear_sparsity": 84.912109375, - "linear_total": 84934656, - "nnz": 36773378, - "total": 108893186, - "total_sparsity": 66.22986308803564 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 77.06717123935667, - "f1": 85.28341140334766 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 25 - }, - "speed": { - "cuda_eval_elapsed_time": 13.584790561676026, - "eval_elapsed_time": 20.705443068873137 - }, - "speedup": 2.8410002222816386, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 384768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 502272, - "linear_dense_total": 4718592, - "linear_nnz": 887040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 355840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 701952, - "linear_dense_total": 4718592, - "linear_nnz": 1057792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 256512, - "linear_attention_total": 2359296, - "linear_dense_nnz": 115200, - "linear_dense_total": 4718592, - "linear_nnz": 371712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 150016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 221184, - "linear_dense_total": 4718592, - "linear_nnz": 371200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 413440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 872448, - "linear_dense_total": 4718592, - "linear_nnz": 1285888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 672256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 824832, - "linear_dense_total": 4718592, - "linear_nnz": 1497088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 418560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 932352, - "linear_dense_total": 4718592, - "linear_nnz": 1350912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 523264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 872448, - "linear_dense_total": 4718592, - "linear_nnz": 1395712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 498944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 655872, - "linear_dense_total": 4718592, - "linear_nnz": 1154816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 497664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 562176, - "linear_dense_total": 4718592, - "linear_nnz": 1059840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 297216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 311808, - "linear_dense_total": 4718592, - "linear_nnz": 609024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 316416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 119808, - "linear_dense_total": 4718592, - "linear_nnz": 436224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 11477248, - "linear_sparsity": 86.4869671103395, - "linear_total": 84934656, - "nnz": 35435778, - "total": 108893186, - "total_sparsity": 67.45822277621669 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 79.80132450331126, - "f1": 87.48291010744668 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 18.270113506317138, - "eval_elapsed_time": 25.450434973929077 - }, - "speedup": 2.1124331270315624, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 627712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1281024, - "linear_dense_total": 4718592, - "linear_nnz": 1908736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 596992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1548288, - "linear_dense_total": 4718592, - "linear_nnz": 2145280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 451584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 182784, - "linear_dense_total": 4718592, - "linear_nnz": 634368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 268288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 559104, - "linear_dense_total": 4718592, - "linear_nnz": 827392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 789504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1709568, - "linear_dense_total": 4718592, - "linear_nnz": 2499072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1180672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1740288, - "linear_dense_total": 4718592, - "linear_nnz": 2920960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1204224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1701888, - "linear_dense_total": 4718592, - "linear_nnz": 2906112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 916480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1600512, - "linear_dense_total": 4718592, - "linear_nnz": 2516992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 909312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1242624, - "linear_dense_total": 4718592, - "linear_nnz": 2151936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 917504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 972288, - "linear_dense_total": 4718592, - "linear_nnz": 1889792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 856064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 542208, - "linear_dense_total": 4718592, - "linear_nnz": 1398272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 611328, - "linear_attention_total": 2359296, - "linear_dense_nnz": 247296, - "linear_dense_total": 4718592, - "linear_nnz": 858624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 22657536, - "linear_sparsity": 73.32356770833333, - "linear_total": 84934656, - "nnz": 46572775, - "total": 108893186, - "total_sparsity": 57.23077199706509 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": { - "eval_metrics": { - "exact_match": 80.27436140018922, - "f1": 87.70461789964966 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 18.375184078216552, - "eval_elapsed_time": 25.600778602063656 - }, - "speedup": 2.1003540884863323, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 645120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1339392, - "linear_dense_total": 4718592, - "linear_nnz": 1984512, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 592896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1571328, - "linear_dense_total": 4718592, - "linear_nnz": 2164224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 480256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 187392, - "linear_dense_total": 4718592, - "linear_nnz": 667648, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 294912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 574464, - "linear_dense_total": 4718592, - "linear_nnz": 869376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 880640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1744896, - "linear_dense_total": 4718592, - "linear_nnz": 2625536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1230848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1761792, - "linear_dense_total": 4718592, - "linear_nnz": 2992640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1214464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1726464, - "linear_dense_total": 4718592, - "linear_nnz": 2940928, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 906240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1629696, - "linear_dense_total": 4718592, - "linear_nnz": 2535936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 943104, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1270272, - "linear_dense_total": 4718592, - "linear_nnz": 2213376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 935936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 987648, - "linear_dense_total": 4718592, - "linear_nnz": 1923584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 872448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 546816, - "linear_dense_total": 4718592, - "linear_nnz": 1419264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 634880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 248832, - "linear_dense_total": 4718592, - "linear_nnz": 883712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 23220736, - "linear_sparsity": 72.66046971450618, - "linear_total": 84934656, - "nnz": 47136529, - "total": 108893186, - "total_sparsity": 56.713059162397904 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 79.20529801324503, - "f1": 87.11181141207972 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 17.401466148376464, - "eval_elapsed_time": 24.569451212882996 - }, - "speedup": 2.2178816817094407, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 838656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 287232, - "linear_dense_total": 4718592, - "linear_nnz": 1125888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 692224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 496128, - "linear_dense_total": 4718592, - "linear_nnz": 1188352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 489472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 84480, - "linear_dense_total": 4718592, - "linear_nnz": 573952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 293888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 155136, - "linear_dense_total": 4718592, - "linear_nnz": 449024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1089536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 605184, - "linear_dense_total": 4718592, - "linear_nnz": 1694720, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1291264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 671232, - "linear_dense_total": 4718592, - "linear_nnz": 1962496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1384448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 728064, - "linear_dense_total": 4718592, - "linear_nnz": 2112512, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1121280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 662016, - "linear_dense_total": 4718592, - "linear_nnz": 1783296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1127424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 505344, - "linear_dense_total": 4718592, - "linear_nnz": 1632768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 942080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 391680, - "linear_dense_total": 4718592, - "linear_nnz": 1333760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 982016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 222720, - "linear_dense_total": 4718592, - "linear_nnz": 1204736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 645120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 84480, - "linear_dense_total": 4718592, - "linear_nnz": 729600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15791104, - "linear_sparsity": 81.40793788580247, - "linear_total": 84934656, - "nnz": 39702836, - "total": 108893186, - "total_sparsity": 63.53965068117302 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 78.80794701986756, - "f1": 86.74156854566804 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 15.618790004730226, - "eval_elapsed_time": 22.811819266993552 - }, - "speedup": 2.471023235070233, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 518144, - "linear_attention_total": 2359296, - "linear_dense_nnz": 826368, - "linear_dense_total": 4718592, - "linear_nnz": 1344512, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 516096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1090560, - "linear_dense_total": 4718592, - "linear_nnz": 1606656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 324608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 147456, - "linear_dense_total": 4718592, - "linear_nnz": 472064, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 209920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 345600, - "linear_dense_total": 4718592, - "linear_nnz": 555520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 637952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1204224, - "linear_dense_total": 4718592, - "linear_nnz": 1842176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 913408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1184256, - "linear_dense_total": 4718592, - "linear_nnz": 2097664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 790528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1265664, - "linear_dense_total": 4718592, - "linear_nnz": 2056192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 664576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1201152, - "linear_dense_total": 4718592, - "linear_nnz": 1865728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 629760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 935424, - "linear_dense_total": 4718592, - "linear_nnz": 1565184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 787456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 698880, - "linear_dense_total": 4718592, - "linear_nnz": 1486336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 415744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 428544, - "linear_dense_total": 4718592, - "linear_nnz": 844288, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 423936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 168960, - "linear_dense_total": 4718592, - "linear_nnz": 592896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 16329216, - "linear_sparsity": 80.7743778935185, - "linear_total": 84934656, - "nnz": 40239113, - "total": 108893186, - "total_sparsity": 63.04717083032174 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": { - "eval_metrics": { - "exact_match": 78.85525070955535, - "f1": 86.76897969849135 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 15.796802043914795, - "eval_elapsed_time": 22.965831307694316 - }, - "speedup": 2.4431776063331965, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 546816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 847872, - "linear_dense_total": 4718592, - "linear_nnz": 1394688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 539648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1101312, - "linear_dense_total": 4718592, - "linear_nnz": 1640960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 354304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 147456, - "linear_dense_total": 4718592, - "linear_nnz": 501760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 226304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 365568, - "linear_dense_total": 4718592, - "linear_nnz": 591872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 657408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1221120, - "linear_dense_total": 4718592, - "linear_nnz": 1878528, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 931840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1211904, - "linear_dense_total": 4718592, - "linear_nnz": 2143744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 864256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1279488, - "linear_dense_total": 4718592, - "linear_nnz": 2143744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 686080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1216512, - "linear_dense_total": 4718592, - "linear_nnz": 1902592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 649216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 952320, - "linear_dense_total": 4718592, - "linear_nnz": 1601536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 791552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 715776, - "linear_dense_total": 4718592, - "linear_nnz": 1507328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 474112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 434688, - "linear_dense_total": 4718592, - "linear_nnz": 908800, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 435200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 172032, - "linear_dense_total": 4718592, - "linear_nnz": 607232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 16822784, - "linear_sparsity": 80.19326292438271, - "linear_total": 84934656, - "nnz": 40733175, - "total": 108893186, - "total_sparsity": 62.593458327135366 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 77.18070009460737, - "f1": 85.6109462422114 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 13.485522186279297, - "eval_elapsed_time": 20.651509277056903 - }, - "speedup": 2.86191312967017, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 424960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 482304, - "linear_dense_total": 4718592, - "linear_nnz": 907264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 367616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 706560, - "linear_dense_total": 4718592, - "linear_nnz": 1074176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 256000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 121344, - "linear_dense_total": 4718592, - "linear_nnz": 377344, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 146432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 215040, - "linear_dense_total": 4718592, - "linear_nnz": 361472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 402432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 850944, - "linear_dense_total": 4718592, - "linear_nnz": 1253376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 681984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 826368, - "linear_dense_total": 4718592, - "linear_nnz": 1508352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 405504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 923136, - "linear_dense_total": 4718592, - "linear_nnz": 1328640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 542720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 880128, - "linear_dense_total": 4718592, - "linear_nnz": 1422848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 449536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 645120, - "linear_dense_total": 4718592, - "linear_nnz": 1094656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 577536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 525312, - "linear_dense_total": 4718592, - "linear_nnz": 1102848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 294912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 333312, - "linear_dense_total": 4718592, - "linear_nnz": 628224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 320512, - "linear_attention_total": 2359296, - "linear_dense_nnz": 113664, - "linear_dense_total": 4718592, - "linear_nnz": 434176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 11493376, - "linear_sparsity": 86.46797839506173, - "linear_total": 84934656, - "nnz": 35398714, - "total": 108893186, - "total_sparsity": 67.49225980035152 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 78.9120151371807, - "f1": 86.62567124382491 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.00156, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 2.5 - }, - "speed": { - "cuda_eval_elapsed_time": 13.135342720031739, - "eval_elapsed_time": 20.267827302217484 - }, - "speedup": 2.9382098227641706, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1474560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1376256, - "linear_dense_total": 4718592, - "linear_nnz": 2850816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1537536, - "linear_dense_total": 4718592, - "linear_nnz": 2323968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 479232, - "linear_dense_total": 4718592, - "linear_nnz": 970752, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 294912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 786432, - "linear_dense_total": 4718592, - "linear_nnz": 1081344, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1843200, - "linear_dense_total": 4718592, - "linear_nnz": 3022848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 884736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1672704, - "linear_dense_total": 4718592, - "linear_nnz": 2557440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1620480, - "linear_dense_total": 4718592, - "linear_nnz": 2210304, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1675776, - "linear_dense_total": 4718592, - "linear_nnz": 2068992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1299456, - "linear_dense_total": 4718592, - "linear_nnz": 1790976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1069056, - "linear_dense_total": 4718592, - "linear_nnz": 1462272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 737280, - "linear_dense_total": 4718592, - "linear_nnz": 1523712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 196608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 322560, - "linear_dense_total": 4718592, - "linear_nnz": 519168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 22382592, - "linear_sparsity": 73.6472800925926, - "linear_total": 84934656, - "nnz": 46293486, - "total": 108893186, - "total_sparsity": 57.487251773494805 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": { - "eval_metrics": { - "exact_match": 79.27152317880795, - "f1": 86.82791223756466 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.00156, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 2.5 - }, - "speed": { - "cuda_eval_elapsed_time": 13.55481234741211, - "eval_elapsed_time": 20.70654077688232 - }, - "speedup": 2.847283460382213, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1474560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1430016, - "linear_dense_total": 4718592, - "linear_nnz": 2904576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1582080, - "linear_dense_total": 4718592, - "linear_nnz": 2368512, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 499200, - "linear_dense_total": 4718592, - "linear_nnz": 990720, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 294912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 812544, - "linear_dense_total": 4718592, - "linear_nnz": 1107456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1904640, - "linear_dense_total": 4718592, - "linear_nnz": 3084288, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 884736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1715712, - "linear_dense_total": 4718592, - "linear_nnz": 2600448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1654272, - "linear_dense_total": 4718592, - "linear_nnz": 2244096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1703424, - "linear_dense_total": 4718592, - "linear_nnz": 2096640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1320960, - "linear_dense_total": 4718592, - "linear_nnz": 1910784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1082880, - "linear_dense_total": 4718592, - "linear_nnz": 1476096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 748032, - "linear_dense_total": 4718592, - "linear_nnz": 1534464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 196608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 327168, - "linear_dense_total": 4718592, - "linear_nnz": 523776, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 22841856, - "linear_sparsity": 73.10655381944444, - "linear_total": 84934656, - "nnz": 46753113, - "total": 108893186, - "total_sparsity": 57.06516200196401 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw10_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": { - "eval_metrics": { - "exact_match": 78.9593188268685, - "f1": 86.6751351982691 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.00156, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 2.5 - }, - "speed": { - "cuda_eval_elapsed_time": 13.45258574295044, - "eval_elapsed_time": 20.617251713294536 - }, - "speedup": 2.868920053202093, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1474560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1408512, - "linear_dense_total": 4718592, - "linear_nnz": 2883072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1555968, - "linear_dense_total": 4718592, - "linear_nnz": 2342400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 491520, - "linear_dense_total": 4718592, - "linear_nnz": 983040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 294912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 798720, - "linear_dense_total": 4718592, - "linear_nnz": 1093632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1875456, - "linear_dense_total": 4718592, - "linear_nnz": 3055104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 884736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1700352, - "linear_dense_total": 4718592, - "linear_nnz": 2585088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1635840, - "linear_dense_total": 4718592, - "linear_nnz": 2225664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1689600, - "linear_dense_total": 4718592, - "linear_nnz": 2082816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1311744, - "linear_dense_total": 4718592, - "linear_nnz": 1901568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1075200, - "linear_dense_total": 4718592, - "linear_nnz": 1468416, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 741888, - "linear_dense_total": 4718592, - "linear_nnz": 1528320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 196608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 324096, - "linear_dense_total": 4718592, - "linear_nnz": 520704, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 22669824, - "linear_sparsity": 73.30910011574075, - "linear_total": 84934656, - "nnz": 46580969, - "total": 108893186, - "total_sparsity": 57.22324719197764 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "eval_metrics": { - "exact_match": 78.99716177861873, - "f1": 86.83592847349966 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.0001, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 5, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 2.5 - }, - "speed": { - "cuda_eval_elapsed_time": 13.721765300750732, - "eval_elapsed_time": 20.8758007818833 - }, - "speedup": 2.812640513772638, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1474560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1499136, - "linear_dense_total": 4718592, - "linear_nnz": 2973696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1660416, - "linear_dense_total": 4718592, - "linear_nnz": 2446848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 513024, - "linear_dense_total": 4718592, - "linear_nnz": 1004544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 294912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 898560, - "linear_dense_total": 4718592, - "linear_nnz": 1193472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 983040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1995264, - "linear_dense_total": 4718592, - "linear_nnz": 2978304, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1277952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1855488, - "linear_dense_total": 4718592, - "linear_nnz": 3133440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1724928, - "linear_dense_total": 4718592, - "linear_nnz": 2216448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1798656, - "linear_dense_total": 4718592, - "linear_nnz": 2191872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1333248, - "linear_dense_total": 4718592, - "linear_nnz": 1824768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1133568, - "linear_dense_total": 4718592, - "linear_nnz": 1526784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 798720, - "linear_dense_total": 4718592, - "linear_nnz": 1585152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 196608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 345600, - "linear_dense_total": 4718592, - "linear_nnz": 542208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 23617536, - "linear_sparsity": 72.19328703703704, - "linear_total": 84934656, - "nnz": 47529298, - "total": 108893186, - "total_sparsity": 56.35236717199184 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 78.93093661305582, - "f1": 86.7840779199463 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.0001, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 5, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 2.5 - }, - "speed": { - "cuda_eval_elapsed_time": 13.714137535095215, - "eval_elapsed_time": 20.88254290400073 - }, - "speedup": 2.8142048967064803, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1474560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1499136, - "linear_dense_total": 4718592, - "linear_nnz": 2973696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1658880, - "linear_dense_total": 4718592, - "linear_nnz": 2445312, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 513024, - "linear_dense_total": 4718592, - "linear_nnz": 1004544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 294912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 898560, - "linear_dense_total": 4718592, - "linear_nnz": 1193472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 983040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1993728, - "linear_dense_total": 4718592, - "linear_nnz": 2976768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1277952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1855488, - "linear_dense_total": 4718592, - "linear_nnz": 3133440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1723392, - "linear_dense_total": 4718592, - "linear_nnz": 2214912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1798656, - "linear_dense_total": 4718592, - "linear_nnz": 2191872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1331712, - "linear_dense_total": 4718592, - "linear_nnz": 1823232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1132032, - "linear_dense_total": 4718592, - "linear_nnz": 1525248, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 798720, - "linear_dense_total": 4718592, - "linear_nnz": 1585152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 196608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 345600, - "linear_dense_total": 4718592, - "linear_nnz": 542208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 23609856, - "linear_sparsity": 72.2023292824074, - "linear_total": 84934656, - "nnz": 47521613, - "total": 108893186, - "total_sparsity": 56.35942454654601 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.0001_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": { - "eval_metrics": { - "exact_match": 78.94985808893094, - "f1": 86.86276298220868 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.0001, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 5, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 2.5 - }, - "speed": { - "cuda_eval_elapsed_time": 14.452293647766114, - "eval_elapsed_time": 21.616635580081493 - }, - "speedup": 2.670468366198511, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1474560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1777152, - "linear_dense_total": 4718592, - "linear_nnz": 3251712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 884736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1918464, - "linear_dense_total": 4718592, - "linear_nnz": 2803200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 583680, - "linear_dense_total": 4718592, - "linear_nnz": 1075200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 294912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1009152, - "linear_dense_total": 4718592, - "linear_nnz": 1304064, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1081344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2239488, - "linear_dense_total": 4718592, - "linear_nnz": 3320832, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1277952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2075136, - "linear_dense_total": 4718592, - "linear_nnz": 3353088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1880064, - "linear_dense_total": 4718592, - "linear_nnz": 2469888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1929216, - "linear_dense_total": 4718592, - "linear_nnz": 2322432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1508352, - "linear_dense_total": 4718592, - "linear_nnz": 2098176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1248768, - "linear_dense_total": 4718592, - "linear_nnz": 1641984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 852480, - "linear_dense_total": 4718592, - "linear_nnz": 1638912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 196608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 370176, - "linear_dense_total": 4718592, - "linear_nnz": 566784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 25846272, - "linear_sparsity": 69.56922743055556, - "linear_total": 84934656, - "nnz": 49759613, - "total": 108893186, - "total_sparsity": 54.304199529987116 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl2.5_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 79.120151371807, - "f1": 86.7638780082266 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.00156, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 5, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 2.5 - }, - "speed": { - "cuda_eval_elapsed_time": 13.628461513519287, - "eval_elapsed_time": 20.942012635990977 - }, - "speedup": 2.831896539978916, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1474560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1420800, - "linear_dense_total": 4718592, - "linear_nnz": 2895360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1740288, - "linear_dense_total": 4718592, - "linear_nnz": 2330112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 560640, - "linear_dense_total": 4718592, - "linear_nnz": 1052160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 294912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 910848, - "linear_dense_total": 4718592, - "linear_nnz": 1205760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2061312, - "linear_dense_total": 4718592, - "linear_nnz": 3240960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1915392, - "linear_dense_total": 4718592, - "linear_nnz": 3095040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1800192, - "linear_dense_total": 4718592, - "linear_nnz": 2291712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1827840, - "linear_dense_total": 4718592, - "linear_nnz": 2221056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1370112, - "linear_dense_total": 4718592, - "linear_nnz": 1861632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 196608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1104384, - "linear_dense_total": 4718592, - "linear_nnz": 1300992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 850944, - "linear_dense_total": 4718592, - "linear_nnz": 1637376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 196608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 385536, - "linear_dense_total": 4718592, - "linear_nnz": 582144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 23714304, - "linear_sparsity": 72.07935474537037, - "linear_total": 84934656, - "nnz": 47626001, - "total": 108893186, - "total_sparsity": 56.2635617989908 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_it0_fw5_r-l1_rfl5_al1e-05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 78.61873226111636, - "f1": 86.37059709799422 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 1e-05, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 5, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5 - }, - "speed": { - "cuda_eval_elapsed_time": 12.066676223754882, - "eval_elapsed_time": 19.25184288667515 - }, - "speedup": 3.198427826329907, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1474560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 728064, - "linear_dense_total": 4718592, - "linear_nnz": 2202624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 998400, - "linear_dense_total": 4718592, - "linear_nnz": 1391616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 314880, - "linear_dense_total": 4718592, - "linear_nnz": 806400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 511488, - "linear_dense_total": 4718592, - "linear_nnz": 904704, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1218048, - "linear_dense_total": 4718592, - "linear_nnz": 2397696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1122816, - "linear_dense_total": 4718592, - "linear_nnz": 2302464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1201152, - "linear_dense_total": 4718592, - "linear_nnz": 1692672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1204224, - "linear_dense_total": 4718592, - "linear_nnz": 1794048, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 870912, - "linear_dense_total": 4718592, - "linear_nnz": 1460736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 761856, - "linear_dense_total": 4718592, - "linear_nnz": 1155072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 503808, - "linear_dense_total": 4718592, - "linear_nnz": 1290240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 196608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 227328, - "linear_dense_total": 4718592, - "linear_nnz": 423936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 17822208, - "linear_sparsity": 79.0165653935185, - "linear_total": 84934656, - "nnz": 41730197, - "total": 108893186, - "total_sparsity": 61.67786201057612 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": { - "eval_metrics": { - "exact_match": 78.29706717123936, - "f1": 86.2625032125089 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.00156, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 11.486401634216309, - "eval_elapsed_time": 18.590640037320554 - }, - "speedup": 3.3600072707194957, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 293376, - "linear_dense_total": 4718592, - "linear_nnz": 1669632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 422400, - "linear_dense_total": 4718592, - "linear_nnz": 913920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 118272, - "linear_dense_total": 4718592, - "linear_nnz": 708096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 225792, - "linear_dense_total": 4718592, - "linear_nnz": 619008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 592896, - "linear_dense_total": 4718592, - "linear_nnz": 1969152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1081344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 631296, - "linear_dense_total": 4718592, - "linear_nnz": 1712640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 884736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 674304, - "linear_dense_total": 4718592, - "linear_nnz": 1559040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 629760, - "linear_dense_total": 4718592, - "linear_nnz": 1219584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 471552, - "linear_dense_total": 4718592, - "linear_nnz": 1257984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 540672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 414720, - "linear_dense_total": 4718592, - "linear_nnz": 955392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 835584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 254976, - "linear_dense_total": 4718592, - "linear_nnz": 1090560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 344064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 87552, - "linear_dense_total": 4718592, - "linear_nnz": 431616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 14106624, - "linear_sparsity": 83.3912037037037, - "linear_total": 84934656, - "nnz": 38065154, - "total": 108893186, - "total_sparsity": 65.04358500448319 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 78.240302743614, - "f1": 86.19280466015066 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.00156, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 11.375749713897706, - "eval_elapsed_time": 18.49867358384654 - }, - "speedup": 3.3926900622840255, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1425408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 287232, - "linear_dense_total": 4718592, - "linear_nnz": 1712640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 416256, - "linear_dense_total": 4718592, - "linear_nnz": 907776, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 638976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 118272, - "linear_dense_total": 4718592, - "linear_nnz": 757248, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 225792, - "linear_dense_total": 4718592, - "linear_nnz": 619008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 591360, - "linear_dense_total": 4718592, - "linear_nnz": 1967616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1081344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 629760, - "linear_dense_total": 4718592, - "linear_nnz": 1711104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 933888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 674304, - "linear_dense_total": 4718592, - "linear_nnz": 1608192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 625152, - "linear_dense_total": 4718592, - "linear_nnz": 1214976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 688128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 473088, - "linear_dense_total": 4718592, - "linear_nnz": 1161216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 540672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 413184, - "linear_dense_total": 4718592, - "linear_nnz": 953856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 254976, - "linear_dense_total": 4718592, - "linear_nnz": 1041408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 89088, - "linear_dense_total": 4718592, - "linear_nnz": 482304, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 14137344, - "linear_sparsity": 83.35503472222221, - "linear_total": 84934656, - "nnz": 38095874, - "total": 108893186, - "total_sparsity": 65.0153738728886 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": { - "eval_metrics": { - "exact_match": 78.26868495742669, - "f1": 86.30683282660192 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.00156, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 11.534610126495362, - "eval_elapsed_time": 18.694298257119954 - }, - "speedup": 3.345964239980557, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1425408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 314880, - "linear_dense_total": 4718592, - "linear_nnz": 1740288, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 448512, - "linear_dense_total": 4718592, - "linear_nnz": 940032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 638976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 121344, - "linear_dense_total": 4718592, - "linear_nnz": 760320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 233472, - "linear_dense_total": 4718592, - "linear_nnz": 626688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 615936, - "linear_dense_total": 4718592, - "linear_nnz": 1992192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1081344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 646656, - "linear_dense_total": 4718592, - "linear_nnz": 1728000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 933888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 717312, - "linear_dense_total": 4718592, - "linear_nnz": 1651200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 655872, - "linear_dense_total": 4718592, - "linear_nnz": 1245696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 482304, - "linear_dense_total": 4718592, - "linear_nnz": 1268736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 442368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 434688, - "linear_dense_total": 4718592, - "linear_nnz": 877056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 262656, - "linear_dense_total": 4718592, - "linear_nnz": 1049088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 540672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 89088, - "linear_dense_total": 4718592, - "linear_nnz": 629760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 14509056, - "linear_sparsity": 82.9173900462963, - "linear_total": 84934656, - "nnz": 38467586, - "total": 108893186, - "total_sparsity": 64.67401918059409 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "eval_metrics": { - "exact_match": 77.96594134342479, - "f1": 85.91370280008687 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.05, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 10.910909118652343, - "eval_elapsed_time": 18.124292518012226 - }, - "speedup": 3.537229811528122, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 287232, - "linear_dense_total": 4718592, - "linear_nnz": 1663488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 835584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 446976, - "linear_dense_total": 4718592, - "linear_nnz": 1282560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 153600, - "linear_dense_total": 4718592, - "linear_nnz": 645120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 196608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 215040, - "linear_dense_total": 4718592, - "linear_nnz": 411648, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 626688, - "linear_dense_total": 4718592, - "linear_nnz": 1806336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 884736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 566784, - "linear_dense_total": 4718592, - "linear_nnz": 1451520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 688128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 697344, - "linear_dense_total": 4718592, - "linear_nnz": 1385472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 675840, - "linear_dense_total": 4718592, - "linear_nnz": 1265664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 479232, - "linear_dense_total": 4718592, - "linear_nnz": 1069056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 416256, - "linear_dense_total": 4718592, - "linear_nnz": 809472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 279552, - "linear_dense_total": 4718592, - "linear_nnz": 1065984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 125952, - "linear_dense_total": 4718592, - "linear_nnz": 519168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 13375488, - "linear_sparsity": 84.25202546296296, - "linear_total": 84934656, - "nnz": 37334018, - "total": 108893186, - "total_sparsity": 65.7150099364344 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl10.0_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 77.69157994323557, - "f1": 85.76954041169931 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.05, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 10.839029987335206, - "eval_elapsed_time": 17.95050869276747 - }, - "speedup": 3.5606869849485117, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 282624, - "linear_dense_total": 4718592, - "linear_nnz": 1658880, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 446976, - "linear_dense_total": 4718592, - "linear_nnz": 1233408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 153600, - "linear_dense_total": 4718592, - "linear_nnz": 645120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 196608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 215040, - "linear_dense_total": 4718592, - "linear_nnz": 411648, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 626688, - "linear_dense_total": 4718592, - "linear_nnz": 1806336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 568320, - "linear_dense_total": 4718592, - "linear_nnz": 1354752, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 688128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 698880, - "linear_dense_total": 4718592, - "linear_nnz": 1387008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 678912, - "linear_dense_total": 4718592, - "linear_nnz": 1268736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 480768, - "linear_dense_total": 4718592, - "linear_nnz": 1070592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 416256, - "linear_dense_total": 4718592, - "linear_nnz": 809472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 279552, - "linear_dense_total": 4718592, - "linear_nnz": 1065984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 125952, - "linear_dense_total": 4718592, - "linear_nnz": 519168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 13231104, - "linear_sparsity": 84.42201967592592, - "linear_total": 84934656, - "nnz": 37189634, - "total": 108893186, - "total_sparsity": 65.84760225492897 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": { - "eval_metrics": { - "exact_match": 77.36991485335857, - "f1": 85.60283555208089 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.05, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 14.9999 - }, - "speed": { - "cuda_eval_elapsed_time": 10.616429515838623, - "eval_elapsed_time": 17.745041345246136 - }, - "speedup": 3.6353458521805493, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 211968, - "linear_dense_total": 4718592, - "linear_nnz": 1588224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 638976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 254976, - "linear_dense_total": 4718592, - "linear_nnz": 893952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 113664, - "linear_dense_total": 4718592, - "linear_nnz": 605184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 196608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 168960, - "linear_dense_total": 4718592, - "linear_nnz": 365568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 427008, - "linear_dense_total": 4718592, - "linear_nnz": 1803264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 460800, - "linear_dense_total": 4718592, - "linear_nnz": 1247232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 737280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 520704, - "linear_dense_total": 4718592, - "linear_nnz": 1257984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 480768, - "linear_dense_total": 4718592, - "linear_nnz": 1070592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 388608, - "linear_dense_total": 4718592, - "linear_nnz": 978432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 314880, - "linear_dense_total": 4718592, - "linear_nnz": 708096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 198144, - "linear_dense_total": 4718592, - "linear_nnz": 984576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 196608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 92160, - "linear_dense_total": 4718592, - "linear_nnz": 288768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 11791872, - "linear_sparsity": 86.11653645833334, - "linear_total": 84934656, - "nnz": 35750402, - "total": 108893186, - "total_sparsity": 67.16929377013544 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "eval_metrics": { - "exact_match": 77.41721854304636, - "f1": 85.51634639956605 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.05, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 14.9999 - }, - "speed": { - "cuda_eval_elapsed_time": 10.602042137145997, - "eval_elapsed_time": 17.720320571679622 - }, - "speedup": 3.6402791562343726, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 211968, - "linear_dense_total": 4718592, - "linear_nnz": 1588224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 638976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 258048, - "linear_dense_total": 4718592, - "linear_nnz": 897024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 110592, - "linear_dense_total": 4718592, - "linear_nnz": 602112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 196608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 167424, - "linear_dense_total": 4718592, - "linear_nnz": 364032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 428544, - "linear_dense_total": 4718592, - "linear_nnz": 1804800, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 457728, - "linear_dense_total": 4718592, - "linear_nnz": 1244160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 688128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 496128, - "linear_dense_total": 4718592, - "linear_nnz": 1184256, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 474624, - "linear_dense_total": 4718592, - "linear_nnz": 1064448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 387072, - "linear_dense_total": 4718592, - "linear_nnz": 976896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 311808, - "linear_dense_total": 4718592, - "linear_nnz": 705024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 198144, - "linear_dense_total": 4718592, - "linear_nnz": 984576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 196608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 92160, - "linear_dense_total": 4718592, - "linear_nnz": 288768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 11704320, - "linear_sparsity": 86.21961805555556, - "linear_total": 84934656, - "nnz": 35662850, - "total": 108893186, - "total_sparsity": 67.24969549518002 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 77.32261116367077, - "f1": 85.45260706155949 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.05, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 14.9999 - }, - "speed": { - "cuda_eval_elapsed_time": 10.591327346801759, - "eval_elapsed_time": 17.679683603346348 - }, - "speedup": 3.6439618700884893, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 210432, - "linear_dense_total": 4718592, - "linear_nnz": 1586688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 638976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 248832, - "linear_dense_total": 4718592, - "linear_nnz": 887808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 110592, - "linear_dense_total": 4718592, - "linear_nnz": 602112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 196608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 164352, - "linear_dense_total": 4718592, - "linear_nnz": 360960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 427008, - "linear_dense_total": 4718592, - "linear_nnz": 1803264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 457728, - "linear_dense_total": 4718592, - "linear_nnz": 1244160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 688128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 486912, - "linear_dense_total": 4718592, - "linear_nnz": 1175040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 473088, - "linear_dense_total": 4718592, - "linear_nnz": 1062912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 387072, - "linear_dense_total": 4718592, - "linear_nnz": 976896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 311808, - "linear_dense_total": 4718592, - "linear_nnz": 705024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 198144, - "linear_dense_total": 4718592, - "linear_nnz": 984576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 196608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 93696, - "linear_dense_total": 4718592, - "linear_nnz": 290304, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 11679744, - "linear_sparsity": 86.24855324074075, - "linear_total": 84934656, - "nnz": 35638274, - "total": 108893186, - "total_sparsity": 67.27226440045568 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": { - "eval_metrics": { - "exact_match": 77.71996215704824, - "f1": 85.77799129804794 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.05, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 14.9999 - }, - "speed": { - "cuda_eval_elapsed_time": 10.789498657226563, - "eval_elapsed_time": 17.89066500775516 - }, - "speedup": 3.5770330236355736, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1474560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 247296, - "linear_dense_total": 4718592, - "linear_nnz": 1721856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 638976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 311808, - "linear_dense_total": 4718592, - "linear_nnz": 950784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 122880, - "linear_dense_total": 4718592, - "linear_nnz": 614400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 196608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 175104, - "linear_dense_total": 4718592, - "linear_nnz": 371712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 480768, - "linear_dense_total": 4718592, - "linear_nnz": 1857024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 491520, - "linear_dense_total": 4718592, - "linear_nnz": 1277952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 884736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 552960, - "linear_dense_total": 4718592, - "linear_nnz": 1437696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 523776, - "linear_dense_total": 4718592, - "linear_nnz": 1113600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 425472, - "linear_dense_total": 4718592, - "linear_nnz": 1015296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 337920, - "linear_dense_total": 4718592, - "linear_nnz": 731136, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 213504, - "linear_dense_total": 4718592, - "linear_nnz": 999936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 196608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 99840, - "linear_dense_total": 4718592, - "linear_nnz": 296448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 12387840, - "linear_sparsity": 85.4148582175926, - "linear_total": 84934656, - "nnz": 36346370, - "total": 108893186, - "total_sparsity": 66.62199781720042 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "eval_metrics": { - "exact_match": 79.19583727530747, - "f1": 86.86229967213058 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.00156, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 12.72295495223999, - "eval_elapsed_time": 19.843479705043137 - }, - "speedup": 3.0334457011164853, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1474560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 606720, - "linear_dense_total": 4718592, - "linear_nnz": 2081280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 688128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 841728, - "linear_dense_total": 4718592, - "linear_nnz": 1529856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 202752, - "linear_dense_total": 4718592, - "linear_nnz": 595968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 431616, - "linear_dense_total": 4718592, - "linear_nnz": 824832, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1061376, - "linear_dense_total": 4718592, - "linear_nnz": 2437632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1081344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1033728, - "linear_dense_total": 4718592, - "linear_nnz": 2115072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 835584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1092096, - "linear_dense_total": 4718592, - "linear_nnz": 1927680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1101312, - "linear_dense_total": 4718592, - "linear_nnz": 1887744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 638976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 809472, - "linear_dense_total": 4718592, - "linear_nnz": 1448448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 668160, - "linear_dense_total": 4718592, - "linear_nnz": 1257984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 391680, - "linear_dense_total": 4718592, - "linear_nnz": 1178112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 491520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 173568, - "linear_dense_total": 4718592, - "linear_nnz": 665088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 17949696, - "linear_sparsity": 78.86646412037037, - "linear_total": 84934656, - "nnz": 41908226, - "total": 108893186, - "total_sparsity": 61.51437244200017 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_it0_fw10_r-l1_rfl5.0_al0.00156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 79.06338694418164, - "f1": 86.70235473718577 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.00156, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 12.594031677246093, - "eval_elapsed_time": 19.77598567586392 - }, - "speedup": 3.064498644631839, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1474560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 605184, - "linear_dense_total": 4718592, - "linear_nnz": 2079744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 840192, - "linear_dense_total": 4718592, - "linear_nnz": 1626624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 344064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 202752, - "linear_dense_total": 4718592, - "linear_nnz": 546816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 393216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 431616, - "linear_dense_total": 4718592, - "linear_nnz": 824832, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1058304, - "linear_dense_total": 4718592, - "linear_nnz": 2434560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1081344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1035264, - "linear_dense_total": 4718592, - "linear_nnz": 2116608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 737280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1092096, - "linear_dense_total": 4718592, - "linear_nnz": 1829376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1099776, - "linear_dense_total": 4718592, - "linear_nnz": 1886208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 688128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 809472, - "linear_dense_total": 4718592, - "linear_nnz": 1497600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 540672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 669696, - "linear_dense_total": 4718592, - "linear_nnz": 1210368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 786432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 391680, - "linear_dense_total": 4718592, - "linear_nnz": 1178112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 589824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 175104, - "linear_dense_total": 4718592, - "linear_nnz": 764928, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 17995776, - "linear_sparsity": 78.81221064814815, - "linear_total": 84934656, - "nnz": 41954306, - "total": 108893186, - "total_sparsity": 61.4720557446083 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "eval_metrics": { - "exact_match": 78.6565752128666, - "f1": 86.45517515140308 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.0156, - "attention_output_with_dense": true, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 14.9999 - }, - "speed": { - "cuda_eval_elapsed_time": 14.27365227508545, - "eval_elapsed_time": 21.38672148110345 - }, - "speedup": 2.7038905153054142, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1459968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 419328, - "linear_dense_total": 4718592, - "linear_nnz": 1879296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 930048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 557568, - "linear_dense_total": 4718592, - "linear_nnz": 1487616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 413184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 150528, - "linear_dense_total": 4718592, - "linear_nnz": 563712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 658176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 261120, - "linear_dense_total": 4718592, - "linear_nnz": 919296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1651200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 800256, - "linear_dense_total": 4718592, - "linear_nnz": 2451456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1181952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 777216, - "linear_dense_total": 4718592, - "linear_nnz": 1959168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 996864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 880128, - "linear_dense_total": 4718592, - "linear_nnz": 1876992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 720384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 886272, - "linear_dense_total": 4718592, - "linear_nnz": 1606656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 595968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 646656, - "linear_dense_total": 4718592, - "linear_nnz": 1242624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 531456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 494592, - "linear_dense_total": 4718592, - "linear_nnz": 1026048, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1029120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 333312, - "linear_dense_total": 4718592, - "linear_nnz": 1362432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 673536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 110592, - "linear_dense_total": 4718592, - "linear_nnz": 784128, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 17159424, - "linear_sparsity": 79.7969111689815, - "linear_total": 84934656, - "nnz": 41117954, - "total": 108893186, - "total_sparsity": 62.240103802270966 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.0156_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 78.52412488174078, - "f1": 86.2110202537131 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.0156, - "attention_output_with_dense": true, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 14.9999 - }, - "speed": { - "cuda_eval_elapsed_time": 14.245073196411132, - "eval_elapsed_time": 21.39737162971869 - }, - "speedup": 2.7093151767794685, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1460736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 420864, - "linear_dense_total": 4718592, - "linear_nnz": 1881600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 930816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 557568, - "linear_dense_total": 4718592, - "linear_nnz": 1488384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 413184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 148992, - "linear_dense_total": 4718592, - "linear_nnz": 562176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 658176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 261120, - "linear_dense_total": 4718592, - "linear_nnz": 919296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1636608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 794112, - "linear_dense_total": 4718592, - "linear_nnz": 2430720, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1172736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 780288, - "linear_dense_total": 4718592, - "linear_nnz": 1953024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 946944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 875520, - "linear_dense_total": 4718592, - "linear_nnz": 1822464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 719616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 883200, - "linear_dense_total": 4718592, - "linear_nnz": 1602816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 602112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 646656, - "linear_dense_total": 4718592, - "linear_nnz": 1248768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 530688, - "linear_attention_total": 2359296, - "linear_dense_nnz": 493056, - "linear_dense_total": 4718592, - "linear_nnz": 1023744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1026816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 333312, - "linear_dense_total": 4718592, - "linear_nnz": 1360128, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 675072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 110592, - "linear_dense_total": 4718592, - "linear_nnz": 785664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 17078784, - "linear_sparsity": 79.89185474537037, - "linear_total": 84934656, - "nnz": 41037314, - "total": 108893186, - "total_sparsity": 62.31415802270676 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": { - "eval_metrics": { - "exact_match": 77.67265846736045, - "f1": 85.79872940903662 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.05, - "attention_output_with_dense": true, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 14.9999 - }, - "speed": { - "cuda_eval_elapsed_time": 12.679624610900879, - "eval_elapsed_time": 19.804423895198852 - }, - "speedup": 3.043811957350131, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1331712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 466944, - "linear_dense_total": 4718592, - "linear_nnz": 1798656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 473088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 649728, - "linear_dense_total": 4718592, - "linear_nnz": 1122816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 417792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 165888, - "linear_dense_total": 4718592, - "linear_nnz": 583680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 258048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 301056, - "linear_dense_total": 4718592, - "linear_nnz": 559104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1500672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 824832, - "linear_dense_total": 4718592, - "linear_nnz": 2325504, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 956160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 834048, - "linear_dense_total": 4718592, - "linear_nnz": 1790208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 963840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 923136, - "linear_dense_total": 4718592, - "linear_nnz": 1886976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 598272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 924672, - "linear_dense_total": 4718592, - "linear_nnz": 1522944, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 558336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 700416, - "linear_dense_total": 4718592, - "linear_nnz": 1258752, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 235008, - "linear_attention_total": 2359296, - "linear_dense_nnz": 502272, - "linear_dense_total": 4718592, - "linear_nnz": 737280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 903936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 382464, - "linear_dense_total": 4718592, - "linear_nnz": 1286400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 533760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 132096, - "linear_dense_total": 4718592, - "linear_nnz": 665856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15538176, - "linear_sparsity": 81.70572916666666, - "linear_total": 84934656, - "nnz": 39496706, - "total": 108893186, - "total_sparsity": 63.72894627217538 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "eval_metrics": { - "exact_match": 77.68211920529801, - "f1": 85.79291904118423 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.05, - "attention_output_with_dense": true, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 14.9999 - }, - "speed": { - "cuda_eval_elapsed_time": 12.68363911819458, - "eval_elapsed_time": 19.829505565110594 - }, - "speedup": 3.0428485583453524, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1334784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 465408, - "linear_dense_total": 4718592, - "linear_nnz": 1800192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 473856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 645120, - "linear_dense_total": 4718592, - "linear_nnz": 1118976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 370176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 165888, - "linear_dense_total": 4718592, - "linear_nnz": 536064, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 257280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 297984, - "linear_dense_total": 4718592, - "linear_nnz": 555264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1497600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 823296, - "linear_dense_total": 4718592, - "linear_nnz": 2320896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 956160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 832512, - "linear_dense_total": 4718592, - "linear_nnz": 1788672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 965376, - "linear_attention_total": 2359296, - "linear_dense_nnz": 926208, - "linear_dense_total": 4718592, - "linear_nnz": 1891584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 600576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 920064, - "linear_dense_total": 4718592, - "linear_nnz": 1520640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 561408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 697344, - "linear_dense_total": 4718592, - "linear_nnz": 1258752, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 230400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 502272, - "linear_dense_total": 4718592, - "linear_nnz": 732672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 897792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 379392, - "linear_dense_total": 4718592, - "linear_nnz": 1277184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 528384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 132096, - "linear_dense_total": 4718592, - "linear_nnz": 660480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15461376, - "linear_sparsity": 81.79615162037037, - "linear_total": 84934656, - "nnz": 39419906, - "total": 108893186, - "total_sparsity": 63.79947410116185 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 77.4550614947966, - "f1": 85.48800663360532 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.05, - "attention_output_with_dense": true, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 14.9999 - }, - "speed": { - "cuda_eval_elapsed_time": 12.51708829498291, - "eval_elapsed_time": 19.654158322140574 - }, - "speedup": 3.08333632357938, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1323264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 470016, - "linear_dense_total": 4718592, - "linear_nnz": 1793280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 470016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 645120, - "linear_dense_total": 4718592, - "linear_nnz": 1115136, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 370176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 165888, - "linear_dense_total": 4718592, - "linear_nnz": 536064, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 258048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 297984, - "linear_dense_total": 4718592, - "linear_nnz": 556032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1496832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 824832, - "linear_dense_total": 4718592, - "linear_nnz": 2321664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 960000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 829440, - "linear_dense_total": 4718592, - "linear_nnz": 1789440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 917760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 926208, - "linear_dense_total": 4718592, - "linear_nnz": 1843968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 607488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 918528, - "linear_dense_total": 4718592, - "linear_nnz": 1526016, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 567552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 697344, - "linear_dense_total": 4718592, - "linear_nnz": 1264896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 231168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 499200, - "linear_dense_total": 4718592, - "linear_nnz": 730368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 900096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 379392, - "linear_dense_total": 4718592, - "linear_nnz": 1279488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 533760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 133632, - "linear_dense_total": 4718592, - "linear_nnz": 667392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15423744, - "linear_sparsity": 81.84045862268519, - "linear_total": 84934656, - "nnz": 39382274, - "total": 108893186, - "total_sparsity": 63.83403273736522 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-75000": { - "eval_metrics": { - "exact_match": 78.25922421948913, - "f1": 86.10555694769658 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.05, - "attention_output_with_dense": true, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 14.9999 - }, - "speed": { - "cuda_eval_elapsed_time": 13.145666324615478, - "eval_elapsed_time": 20.2730004908517 - }, - "speedup": 2.935902376671653, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1390080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 540672, - "linear_dense_total": 4718592, - "linear_nnz": 1930752, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 622848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 724992, - "linear_dense_total": 4718592, - "linear_nnz": 1347840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 322560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 184320, - "linear_dense_total": 4718592, - "linear_nnz": 506880, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 412416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 348672, - "linear_dense_total": 4718592, - "linear_nnz": 761088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1506816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 916992, - "linear_dense_total": 4718592, - "linear_nnz": 2423808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 966144, - "linear_attention_total": 2359296, - "linear_dense_nnz": 898560, - "linear_dense_total": 4718592, - "linear_nnz": 1864704, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 965376, - "linear_attention_total": 2359296, - "linear_dense_nnz": 990720, - "linear_dense_total": 4718592, - "linear_nnz": 1956096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 734976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1007616, - "linear_dense_total": 4718592, - "linear_nnz": 1742592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 561408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 761856, - "linear_dense_total": 4718592, - "linear_nnz": 1323264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 282624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 552960, - "linear_dense_total": 4718592, - "linear_nnz": 835584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 903936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 403968, - "linear_dense_total": 4718592, - "linear_nnz": 1307904, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 536064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 145920, - "linear_dense_total": 4718592, - "linear_nnz": 681984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 16682496, - "linear_sparsity": 80.3584346064815, - "linear_total": 84934656, - "nnz": 40641026, - "total": 108893186, - "total_sparsity": 62.67808162027695 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_it0_fw10_r-l1_rfl14.9999_al0.05_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": { - "eval_metrics": { - "exact_match": 78.00378429517502, - "f1": 86.00102110548276 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 64, - "attention_lambda": 0.05, - "attention_output_with_dense": true, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 14.9999 - }, - "speed": { - "cuda_eval_elapsed_time": 12.730052349090576, - "eval_elapsed_time": 19.889838815666735 - }, - "speedup": 3.0317544615696925, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1331712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 470016, - "linear_dense_total": 4718592, - "linear_nnz": 1801728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 471552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 655872, - "linear_dense_total": 4718592, - "linear_nnz": 1127424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 319488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 168960, - "linear_dense_total": 4718592, - "linear_nnz": 488448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 258048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 302592, - "linear_dense_total": 4718592, - "linear_nnz": 560640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1507584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 834048, - "linear_dense_total": 4718592, - "linear_nnz": 2341632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 960768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 843264, - "linear_dense_total": 4718592, - "linear_nnz": 1804032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 968448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 930816, - "linear_dense_total": 4718592, - "linear_nnz": 1899264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 598272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 930816, - "linear_dense_total": 4718592, - "linear_nnz": 1529088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 564480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 700416, - "linear_dense_total": 4718592, - "linear_nnz": 1264896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 231168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 506880, - "linear_dense_total": 4718592, - "linear_nnz": 738048, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 893952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 384000, - "linear_dense_total": 4718592, - "linear_nnz": 1277952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 535296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 133632, - "linear_dense_total": 4718592, - "linear_nnz": 668928, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15502080, - "linear_sparsity": 81.7482277199074, - "linear_total": 84934656, - "nnz": 39460610, - "total": 108893186, - "total_sparsity": 63.76209435179903 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl150_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 78.4484389782403, - "f1": 86.3547925481507 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 150 - }, - "speed": { - "cuda_eval_elapsed_time": 29.783737594604492, - "eval_elapsed_time": 37.12324417894706 - }, - "speedup": 1.2958210124830911, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 30729, - "linear_attention_total": 2359296, - "linear_dense_nnz": 624455, - "linear_dense_total": 4718592, - "linear_nnz": 655184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 77742, - "linear_attention_total": 2359296, - "linear_dense_nnz": 655389, - "linear_dense_total": 4718592, - "linear_nnz": 733131, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 27892, - "linear_attention_total": 2359296, - "linear_dense_nnz": 61389, - "linear_dense_total": 4718592, - "linear_nnz": 89281, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 20781, - "linear_attention_total": 2359296, - "linear_dense_nnz": 51322, - "linear_dense_total": 4718592, - "linear_nnz": 72103, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 70206, - "linear_attention_total": 2359296, - "linear_dense_nnz": 660173, - "linear_dense_total": 4718592, - "linear_nnz": 730379, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 106339, - "linear_attention_total": 2359296, - "linear_dense_nnz": 628112, - "linear_dense_total": 4718592, - "linear_nnz": 734451, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 81845, - "linear_attention_total": 2359296, - "linear_dense_nnz": 574018, - "linear_dense_total": 4718592, - "linear_nnz": 655863, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 68554, - "linear_attention_total": 2359296, - "linear_dense_nnz": 537752, - "linear_dense_total": 4718592, - "linear_nnz": 606306, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 58217, - "linear_attention_total": 2359296, - "linear_dense_nnz": 434629, - "linear_dense_total": 4718592, - "linear_nnz": 492846, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 65705, - "linear_attention_total": 2359296, - "linear_dense_nnz": 313684, - "linear_dense_total": 4718592, - "linear_nnz": 379389, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 39483, - "linear_attention_total": 2359296, - "linear_dense_nnz": 203724, - "linear_dense_total": 4718592, - "linear_nnz": 243207, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 46007, - "linear_attention_total": 2359296, - "linear_dense_nnz": 73599, - "linear_dense_total": 4718592, - "linear_nnz": 119606, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 5511746, - "linear_sparsity": 93.51060419906804, - "linear_total": 84934656, - "nnz": 29470276, - "total": 108893186, - "total_sparsity": 72.93652882926945 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl225_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 77.39829706717124, - "f1": 85.66626983371626 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 225 - }, - "speed": { - "cuda_eval_elapsed_time": 27.713626304626466, - "eval_elapsed_time": 35.06419681990519 - }, - "speedup": 1.3926143255719736, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 18728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 446655, - "linear_dense_total": 4718592, - "linear_nnz": 465383, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 63059, - "linear_attention_total": 2359296, - "linear_dense_nnz": 464338, - "linear_dense_total": 4718592, - "linear_nnz": 527397, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 21311, - "linear_attention_total": 2359296, - "linear_dense_nnz": 43332, - "linear_dense_total": 4718592, - "linear_nnz": 64643, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 17233, - "linear_attention_total": 2359296, - "linear_dense_nnz": 36806, - "linear_dense_total": 4718592, - "linear_nnz": 54039, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 53761, - "linear_attention_total": 2359296, - "linear_dense_nnz": 462731, - "linear_dense_total": 4718592, - "linear_nnz": 516492, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 84624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 430348, - "linear_dense_total": 4718592, - "linear_nnz": 514972, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 58345, - "linear_attention_total": 2359296, - "linear_dense_nnz": 384869, - "linear_dense_total": 4718592, - "linear_nnz": 443214, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 50615, - "linear_attention_total": 2359296, - "linear_dense_nnz": 346306, - "linear_dense_total": 4718592, - "linear_nnz": 396921, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 41344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 277660, - "linear_dense_total": 4718592, - "linear_nnz": 319004, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 47420, - "linear_attention_total": 2359296, - "linear_dense_nnz": 201763, - "linear_dense_total": 4718592, - "linear_nnz": 249183, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 27562, - "linear_attention_total": 2359296, - "linear_dense_nnz": 133500, - "linear_dense_total": 4718592, - "linear_nnz": 161062, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 34151, - "linear_attention_total": 2359296, - "linear_dense_nnz": 47554, - "linear_dense_total": 4718592, - "linear_nnz": 81705, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 3794015, - "linear_sparsity": 95.5330189363456, - "linear_total": 84934656, - "nnz": 27752545, - "total": 108893186, - "total_sparsity": 74.51397463933142 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 81.40018921475875, - "f1": 88.66263407974378 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 25 - }, - "speed": { - "cuda_eval_elapsed_time": 37.63941863250732, - "eval_elapsed_time": 44.979358388110995 - }, - "speedup": 1.0253716557683228, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 158912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1993831, - "linear_dense_total": 4718592, - "linear_nnz": 2152743, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 234395, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2030737, - "linear_dense_total": 4718592, - "linear_nnz": 2265132, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 134277, - "linear_attention_total": 2359296, - "linear_dense_nnz": 440264, - "linear_dense_total": 4718592, - "linear_nnz": 574541, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 63309, - "linear_attention_total": 2359296, - "linear_dense_nnz": 269756, - "linear_dense_total": 4718592, - "linear_nnz": 333065, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 301048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2114464, - "linear_dense_total": 4718592, - "linear_nnz": 2415512, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 358791, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2106776, - "linear_dense_total": 4718592, - "linear_nnz": 2465567, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 398673, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2058594, - "linear_dense_total": 4718592, - "linear_nnz": 2457267, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 367333, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2043244, - "linear_dense_total": 4718592, - "linear_nnz": 2410577, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 344288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1862492, - "linear_dense_total": 4718592, - "linear_nnz": 2206780, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 304514, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1514517, - "linear_dense_total": 4718592, - "linear_nnz": 1819031, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 265513, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1099308, - "linear_dense_total": 4718592, - "linear_nnz": 1364821, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 201714, - "linear_attention_total": 2359296, - "linear_dense_nnz": 627276, - "linear_dense_total": 4718592, - "linear_nnz": 828990, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 21294026, - "linear_sparsity": 74.92893124804085, - "linear_total": 84934656, - "nnz": 45252556, - "total": 108893186, - "total_sparsity": 58.4431701722824 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl300_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 76.98202459791864, - "f1": 85.40699359564026 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 300 - }, - "speed": { - "cuda_eval_elapsed_time": 25.440285942077637, - "eval_elapsed_time": 32.748252402991056 - }, - "speedup": 1.5170581452285046, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 13195, - "linear_attention_total": 2359296, - "linear_dense_nnz": 344662, - "linear_dense_total": 4718592, - "linear_nnz": 357857, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 53357, - "linear_attention_total": 2359296, - "linear_dense_nnz": 352125, - "linear_dense_total": 4718592, - "linear_nnz": 405482, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 18747, - "linear_attention_total": 2359296, - "linear_dense_nnz": 34723, - "linear_dense_total": 4718592, - "linear_nnz": 53470, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 15957, - "linear_attention_total": 2359296, - "linear_dense_nnz": 30412, - "linear_dense_total": 4718592, - "linear_nnz": 46369, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 43981, - "linear_attention_total": 2359296, - "linear_dense_nnz": 351138, - "linear_dense_total": 4718592, - "linear_nnz": 395119, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 71058, - "linear_attention_total": 2359296, - "linear_dense_nnz": 323059, - "linear_dense_total": 4718592, - "linear_nnz": 394117, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 47705, - "linear_attention_total": 2359296, - "linear_dense_nnz": 287668, - "linear_dense_total": 4718592, - "linear_nnz": 335373, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 40348, - "linear_attention_total": 2359296, - "linear_dense_nnz": 252178, - "linear_dense_total": 4718592, - "linear_nnz": 292526, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 33002, - "linear_attention_total": 2359296, - "linear_dense_nnz": 205112, - "linear_dense_total": 4718592, - "linear_nnz": 238114, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 38753, - "linear_attention_total": 2359296, - "linear_dense_nnz": 150138, - "linear_dense_total": 4718592, - "linear_nnz": 188891, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 22052, - "linear_attention_total": 2359296, - "linear_dense_nnz": 101313, - "linear_dense_total": 4718592, - "linear_nnz": 123365, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 28498, - "linear_attention_total": 2359296, - "linear_dense_nnz": 35917, - "linear_dense_total": 4718592, - "linear_nnz": 64415, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 2895098, - "linear_sparsity": 96.59138196780358, - "linear_total": 84934656, - "nnz": 26853628, - "total": 108893186, - "total_sparsity": 75.33947808267818 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 80.53926206244087, - "f1": 88.07603620459668 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 50 - }, - "speed": { - "cuda_eval_elapsed_time": 35.31425653076172, - "eval_elapsed_time": 42.675803440622985 - }, - "speedup": 1.092884200230921, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 79341, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1362813, - "linear_dense_total": 4718592, - "linear_nnz": 1442154, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 146964, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1411011, - "linear_dense_total": 4718592, - "linear_nnz": 1557975, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 70746, - "linear_attention_total": 2359296, - "linear_dense_nnz": 191871, - "linear_dense_total": 4718592, - "linear_nnz": 262617, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 36271, - "linear_attention_total": 2359296, - "linear_dense_nnz": 137408, - "linear_dense_total": 4718592, - "linear_nnz": 173679, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 173655, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1463754, - "linear_dense_total": 4718592, - "linear_nnz": 1637409, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 213353, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1442359, - "linear_dense_total": 4718592, - "linear_nnz": 1655712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 221518, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1380230, - "linear_dense_total": 4718592, - "linear_nnz": 1601748, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 179373, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1360274, - "linear_dense_total": 4718592, - "linear_nnz": 1539647, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 168393, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1183896, - "linear_dense_total": 4718592, - "linear_nnz": 1352289, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 159612, - "linear_attention_total": 2359296, - "linear_dense_nnz": 906603, - "linear_dense_total": 4718592, - "linear_nnz": 1066215, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 127230, - "linear_attention_total": 2359296, - "linear_dense_nnz": 600693, - "linear_dense_total": 4718592, - "linear_nnz": 727923, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 105257, - "linear_attention_total": 2359296, - "linear_dense_nnz": 285690, - "linear_dense_total": 4718592, - "linear_nnz": 390947, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 13408315, - "linear_sparsity": 84.21337575088313, - "linear_total": 84934656, - "nnz": 37366845, - "total": 108893186, - "total_sparsity": 65.68486388119823 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": { - "eval_metrics": { - "exact_match": 80.22705771050141, - "f1": 88.08154392563726 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 50 - }, - "speed": { - "cuda_eval_elapsed_time": 35.30916271209717, - "eval_elapsed_time": 42.719326278194785 - }, - "speedup": 1.0930418633843273, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 87221, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1434572, - "linear_dense_total": 4718592, - "linear_nnz": 1521793, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 157517, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1480327, - "linear_dense_total": 4718592, - "linear_nnz": 1637844, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 75446, - "linear_attention_total": 2359296, - "linear_dense_nnz": 204546, - "linear_dense_total": 4718592, - "linear_nnz": 279992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 38439, - "linear_attention_total": 2359296, - "linear_dense_nnz": 144390, - "linear_dense_total": 4718592, - "linear_nnz": 182829, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 188172, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1535574, - "linear_dense_total": 4718592, - "linear_nnz": 1723746, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 230341, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1512620, - "linear_dense_total": 4718592, - "linear_nnz": 1742961, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 240387, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1447041, - "linear_dense_total": 4718592, - "linear_nnz": 1687428, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 195780, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1427597, - "linear_dense_total": 4718592, - "linear_nnz": 1623377, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 184963, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1245019, - "linear_dense_total": 4718592, - "linear_nnz": 1429982, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 172954, - "linear_attention_total": 2359296, - "linear_dense_nnz": 957245, - "linear_dense_total": 4718592, - "linear_nnz": 1130199, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 138133, - "linear_attention_total": 2359296, - "linear_dense_nnz": 635763, - "linear_dense_total": 4718592, - "linear_nnz": 773896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 112972, - "linear_attention_total": 2359296, - "linear_dense_nnz": 304891, - "linear_dense_total": 4718592, - "linear_nnz": 417863, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 14151910, - "linear_sparsity": 83.3378850677867, - "linear_total": 84934656, - "nnz": 38110440, - "total": 108893186, - "total_sparsity": 65.00199746198996 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "eval_metrics": { - "exact_match": 79.89593188268685, - "f1": 87.64967103979136 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 75 - }, - "speed": { - "cuda_eval_elapsed_time": 32.98558323669434, - "eval_elapsed_time": 40.38167083170265 - }, - "speedup": 1.170038217254783, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 56754, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1054479, - "linear_dense_total": 4718592, - "linear_nnz": 1111233, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 116764, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1106103, - "linear_dense_total": 4718592, - "linear_nnz": 1222867, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 50915, - "linear_attention_total": 2359296, - "linear_dense_nnz": 121878, - "linear_dense_total": 4718592, - "linear_nnz": 172793, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 28303, - "linear_attention_total": 2359296, - "linear_dense_nnz": 94314, - "linear_dense_total": 4718592, - "linear_nnz": 122617, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 127558, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1136881, - "linear_dense_total": 4718592, - "linear_nnz": 1264439, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 163709, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1106395, - "linear_dense_total": 4718592, - "linear_nnz": 1270104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 158018, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1044282, - "linear_dense_total": 4718592, - "linear_nnz": 1202300, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 125746, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1010449, - "linear_dense_total": 4718592, - "linear_nnz": 1136195, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 110023, - "linear_attention_total": 2359296, - "linear_dense_nnz": 861094, - "linear_dense_total": 4718592, - "linear_nnz": 971117, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 113086, - "linear_attention_total": 2359296, - "linear_dense_nnz": 632989, - "linear_dense_total": 4718592, - "linear_nnz": 746075, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 81879, - "linear_attention_total": 2359296, - "linear_dense_nnz": 407092, - "linear_dense_total": 4718592, - "linear_nnz": 488971, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 77365, - "linear_attention_total": 2359296, - "linear_dense_nnz": 173330, - "linear_dense_total": 4718592, - "linear_nnz": 250695, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 9959406, - "linear_sparsity": 88.27403739646628, - "linear_total": 84934656, - "nnz": 33917936, - "total": 108893186, - "total_sparsity": 68.85210429971255 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 79.8391674550615, - "f1": 87.59923644792065 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 75 - }, - "speed": { - "cuda_eval_elapsed_time": 33.06226232147217, - "eval_elapsed_time": 40.42444095481187 - }, - "speedup": 1.1673246261888772, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 56086, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1044542, - "linear_dense_total": 4718592, - "linear_nnz": 1100628, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 115328, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1096450, - "linear_dense_total": 4718592, - "linear_nnz": 1211778, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 50374, - "linear_attention_total": 2359296, - "linear_dense_nnz": 120861, - "linear_dense_total": 4718592, - "linear_nnz": 171235, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 28038, - "linear_attention_total": 2359296, - "linear_dense_nnz": 93754, - "linear_dense_total": 4718592, - "linear_nnz": 121792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 125881, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1127188, - "linear_dense_total": 4718592, - "linear_nnz": 1253069, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 161525, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1096986, - "linear_dense_total": 4718592, - "linear_nnz": 1258511, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 155911, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1035794, - "linear_dense_total": 4718592, - "linear_nnz": 1191705, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 123921, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1001507, - "linear_dense_total": 4718592, - "linear_nnz": 1125428, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 108430, - "linear_attention_total": 2359296, - "linear_dense_nnz": 853489, - "linear_dense_total": 4718592, - "linear_nnz": 961919, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 111505, - "linear_attention_total": 2359296, - "linear_dense_nnz": 627123, - "linear_dense_total": 4718592, - "linear_nnz": 738628, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 80805, - "linear_attention_total": 2359296, - "linear_dense_nnz": 403383, - "linear_dense_total": 4718592, - "linear_nnz": 484188, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 76456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 171492, - "linear_dense_total": 4718592, - "linear_nnz": 247948, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 9866829, - "linear_sparsity": 88.38303530657733, - "linear_total": 84934656, - "nnz": 33825359, - "total": 108893186, - "total_sparsity": 68.93712063856779 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 77.41721854304636, - "f1": 85.55066476449066 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 75 - }, - "speed": { - "cuda_eval_elapsed_time": 24.64119245147705, - "eval_elapsed_time": 32.04050999786705 - }, - "speedup": 1.5662550861274434, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 36794, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1688900, - "linear_dense_total": 4718592, - "linear_nnz": 1725694, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 233028, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1726592, - "linear_dense_total": 4718592, - "linear_nnz": 1959620, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 89475, - "linear_attention_total": 2359296, - "linear_dense_nnz": 329600, - "linear_dense_total": 4718592, - "linear_nnz": 419075, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 45791, - "linear_attention_total": 2359296, - "linear_dense_nnz": 148125, - "linear_dense_total": 4718592, - "linear_nnz": 193916, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 194318, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1774807, - "linear_dense_total": 4718592, - "linear_nnz": 1969125, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 270153, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1742205, - "linear_dense_total": 4718592, - "linear_nnz": 2012358, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 207935, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1652927, - "linear_dense_total": 4718592, - "linear_nnz": 1860862, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 215427, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1599761, - "linear_dense_total": 4718592, - "linear_nnz": 1815188, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 114563, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1404415, - "linear_dense_total": 4718592, - "linear_nnz": 1518978, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 165011, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1142635, - "linear_dense_total": 4718592, - "linear_nnz": 1307646, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 86589, - "linear_attention_total": 2359296, - "linear_dense_nnz": 859553, - "linear_dense_total": 4718592, - "linear_nnz": 946142, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 110020, - "linear_attention_total": 2359296, - "linear_dense_nnz": 421789, - "linear_dense_total": 4718592, - "linear_nnz": 531809, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 16260413, - "linear_sparsity": 80.85538487375518, - "linear_total": 84934656, - "nnz": 40218943, - "total": 108893186, - "total_sparsity": 63.065693568741764 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "eval_metrics": { - "exact_match": 80.81362346263009, - "f1": 88.10463591853348 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 27.2810027923584, - "eval_elapsed_time": 34.61669071530923 - }, - "speedup": 1.4146984734806616, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 644096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4032512, - "linear_dense_total": 4718592, - "linear_nnz": 4676608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 583680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4172800, - "linear_dense_total": 4718592, - "linear_nnz": 4756480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 445440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 890880, - "linear_dense_total": 4718592, - "linear_nnz": 1336320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 272384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 875520, - "linear_dense_total": 4718592, - "linear_nnz": 1147904, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 789504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4324352, - "linear_dense_total": 4718592, - "linear_nnz": 5113856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1028096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4392960, - "linear_dense_total": 4718592, - "linear_nnz": 5421056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1067008, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4359168, - "linear_dense_total": 4718592, - "linear_nnz": 5426176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 943104, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4333568, - "linear_dense_total": 4718592, - "linear_nnz": 5276672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1003520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4161536, - "linear_dense_total": 4718592, - "linear_nnz": 5165056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 908288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3889152, - "linear_dense_total": 4718592, - "linear_nnz": 4797440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 868352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3021824, - "linear_dense_total": 4718592, - "linear_nnz": 3890176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 520192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1206272, - "linear_dense_total": 4718592, - "linear_nnz": 1726464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 48734208, - "linear_sparsity": 42.62152777777778, - "linear_total": 84934656, - "nnz": 72671586, - "total": 108893186, - "total_sparsity": 33.26342201062975 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 80.53926206244087, - "f1": 87.95145431777735 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 27.25869842529297, - "eval_elapsed_time": 34.5833341376856 - }, - "speedup": 1.4158560472410484, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 614400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4140032, - "linear_dense_total": 4718592, - "linear_nnz": 4754432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 596992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4246528, - "linear_dense_total": 4718592, - "linear_nnz": 4843520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 450560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 881664, - "linear_dense_total": 4718592, - "linear_nnz": 1332224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 266240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 863232, - "linear_dense_total": 4718592, - "linear_nnz": 1129472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 788480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4357120, - "linear_dense_total": 4718592, - "linear_nnz": 5145600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1061888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4426752, - "linear_dense_total": 4718592, - "linear_nnz": 5488640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4414464, - "linear_dense_total": 4718592, - "linear_nnz": 5463040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 918528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4399104, - "linear_dense_total": 4718592, - "linear_nnz": 5317632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 998400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4232192, - "linear_dense_total": 4718592, - "linear_nnz": 5230592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 899072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3939328, - "linear_dense_total": 4718592, - "linear_nnz": 4838400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 819200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3028992, - "linear_dense_total": 4718592, - "linear_nnz": 3848192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 516096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1104896, - "linear_dense_total": 4718592, - "linear_nnz": 1620992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 49012736, - "linear_sparsity": 42.29359567901234, - "linear_total": 84934656, - "nnz": 72950082, - "total": 108893186, - "total_sparsity": 33.00767047076757 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": { - "eval_metrics": { - "exact_match": 80.72847682119205, - "f1": 88.08831525592305 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 27.293812591552737, - "eval_elapsed_time": 34.635603360366076 - }, - "speedup": 1.4140345133503194, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 621568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4035584, - "linear_dense_total": 4718592, - "linear_nnz": 4657152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 604160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4155392, - "linear_dense_total": 4718592, - "linear_nnz": 4759552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 486400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 957440, - "linear_dense_total": 4718592, - "linear_nnz": 1443840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 286720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 891904, - "linear_dense_total": 4718592, - "linear_nnz": 1178624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 781312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4284416, - "linear_dense_total": 4718592, - "linear_nnz": 5065728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1068032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4340736, - "linear_dense_total": 4718592, - "linear_nnz": 5408768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1087488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4237312, - "linear_dense_total": 4718592, - "linear_nnz": 5324800, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 908288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4281344, - "linear_dense_total": 4718592, - "linear_nnz": 5189632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1019904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4149248, - "linear_dense_total": 4718592, - "linear_nnz": 5169152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 921600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3827712, - "linear_dense_total": 4718592, - "linear_nnz": 4749312, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 851968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3080192, - "linear_dense_total": 4718592, - "linear_nnz": 3932160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 529408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1278976, - "linear_dense_total": 4718592, - "linear_nnz": 1808384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 48687104, - "linear_sparsity": 42.67698688271605, - "linear_total": 84934656, - "nnz": 72624802, - "total": 108893186, - "total_sparsity": 33.306385213120684 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 80.48249763481552, - "f1": 87.91705961229685 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 26.4900548248291, - "eval_elapsed_time": 33.8130349079147 - }, - "speedup": 1.4569389629649467, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 634880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3140608, - "linear_dense_total": 4718592, - "linear_nnz": 3775488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 602112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3477504, - "linear_dense_total": 4718592, - "linear_nnz": 4079616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 456704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 494592, - "linear_dense_total": 4718592, - "linear_nnz": 951296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 289792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 541696, - "linear_dense_total": 4718592, - "linear_nnz": 831488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1008640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3929088, - "linear_dense_total": 4718592, - "linear_nnz": 4937728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1197056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4089856, - "linear_dense_total": 4718592, - "linear_nnz": 5286912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1181696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3953664, - "linear_dense_total": 4718592, - "linear_nnz": 5135360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1005568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4006912, - "linear_dense_total": 4718592, - "linear_nnz": 5012480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1043456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3677184, - "linear_dense_total": 4718592, - "linear_nnz": 4720640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 931840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2777088, - "linear_dense_total": 4718592, - "linear_nnz": 3708928, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 862208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1448960, - "linear_dense_total": 4718592, - "linear_nnz": 2311168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 600064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 458752, - "linear_dense_total": 4718592, - "linear_nnz": 1058816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 41809920, - "linear_sparsity": 50.774016203703695, - "linear_total": 84934656, - "nnz": 65744386, - "total": 108893186, - "total_sparsity": 39.6248852522324 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 79.82024597918638, - "f1": 87.30735739624531 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 24.124949531555178, - "eval_elapsed_time": 31.406295038294047 - }, - "speedup": 1.599770932365684, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 889856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1492992, - "linear_dense_total": 4718592, - "linear_nnz": 2382848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 717824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1850368, - "linear_dense_total": 4718592, - "linear_nnz": 2568192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 489472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 328704, - "linear_dense_total": 4718592, - "linear_nnz": 818176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 331776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 388096, - "linear_dense_total": 4718592, - "linear_nnz": 719872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1113088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2802688, - "linear_dense_total": 4718592, - "linear_nnz": 3915776, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1297408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2961408, - "linear_dense_total": 4718592, - "linear_nnz": 4258816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1402880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2897920, - "linear_dense_total": 4718592, - "linear_nnz": 4300800, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1157120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2873344, - "linear_dense_total": 4718592, - "linear_nnz": 4030464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1187840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2473984, - "linear_dense_total": 4718592, - "linear_nnz": 3661824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 979968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1527808, - "linear_dense_total": 4718592, - "linear_nnz": 2507776, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 952320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 610304, - "linear_dense_total": 4718592, - "linear_nnz": 1562624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 642048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 223232, - "linear_dense_total": 4718592, - "linear_nnz": 865280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 31592448, - "linear_sparsity": 62.80381944444444, - "linear_total": 84934656, - "nnz": 55520034, - "total": 108893186, - "total_sparsity": 49.0142257386059 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": { - "eval_metrics": { - "exact_match": 79.82024597918638, - "f1": 87.41794090203474 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 25.313612380981446, - "eval_elapsed_time": 32.61186430603266 - }, - "speedup": 1.524649758576841, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 934912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1777664, - "linear_dense_total": 4718592, - "linear_nnz": 2712576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 738304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2168832, - "linear_dense_total": 4718592, - "linear_nnz": 2907136, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 530432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 343040, - "linear_dense_total": 4718592, - "linear_nnz": 873472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 378880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 421888, - "linear_dense_total": 4718592, - "linear_nnz": 800768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1162240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3039232, - "linear_dense_total": 4718592, - "linear_nnz": 4201472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1366016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3165184, - "linear_dense_total": 4718592, - "linear_nnz": 4531200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1484800, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3182592, - "linear_dense_total": 4718592, - "linear_nnz": 4667392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1414144, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3185664, - "linear_dense_total": 4718592, - "linear_nnz": 4599808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1256448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2572288, - "linear_dense_total": 4718592, - "linear_nnz": 3828736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 991232, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1668096, - "linear_dense_total": 4718592, - "linear_nnz": 2659328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 966656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 688128, - "linear_dense_total": 4718592, - "linear_nnz": 1654784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 691200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 236544, - "linear_dense_total": 4718592, - "linear_nnz": 927744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 34364416, - "linear_sparsity": 59.540171682098766, - "linear_total": 84934656, - "nnz": 58295010, - "total": 108893186, - "total_sparsity": 46.46587895775224 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": { - "eval_metrics": { - "exact_match": 79.90539262062441, - "f1": 87.36378709007766 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 24.748493873596193, - "eval_elapsed_time": 32.03074289299548 - }, - "speedup": 1.559464313363606, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 949248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1635328, - "linear_dense_total": 4718592, - "linear_nnz": 2584576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 750592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2048000, - "linear_dense_total": 4718592, - "linear_nnz": 2798592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 509952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 352256, - "linear_dense_total": 4718592, - "linear_nnz": 862208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 363520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 420864, - "linear_dense_total": 4718592, - "linear_nnz": 784384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1123328, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2895872, - "linear_dense_total": 4718592, - "linear_nnz": 4019200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1306624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2967552, - "linear_dense_total": 4718592, - "linear_nnz": 4274176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1475584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3105792, - "linear_dense_total": 4718592, - "linear_nnz": 4581376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1285120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2934784, - "linear_dense_total": 4718592, - "linear_nnz": 4219904, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1235968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2500608, - "linear_dense_total": 4718592, - "linear_nnz": 3736576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 983040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1604608, - "linear_dense_total": 4718592, - "linear_nnz": 2587648, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 965632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 661504, - "linear_dense_total": 4718592, - "linear_nnz": 1627136, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 650240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 230400, - "linear_dense_total": 4718592, - "linear_nnz": 880640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 32956416, - "linear_sparsity": 61.19791666666667, - "linear_total": 84934656, - "nnz": 56885634, - "total": 108893186, - "total_sparsity": 47.76015277944021 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "eval_metrics": { - "exact_match": 79.29990539262063, - "f1": 87.09851869948527 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 24.736273986816407, - "eval_elapsed_time": 32.05209435708821 - }, - "speedup": 1.5602346992898202, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 527360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3380224, - "linear_dense_total": 4718592, - "linear_nnz": 3907584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3661824, - "linear_dense_total": 4718592, - "linear_nnz": 4186112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 307200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 595968, - "linear_dense_total": 4718592, - "linear_nnz": 903168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 207872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 615424, - "linear_dense_total": 4718592, - "linear_nnz": 823296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 598016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4031488, - "linear_dense_total": 4718592, - "linear_nnz": 4629504, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 930816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4107264, - "linear_dense_total": 4718592, - "linear_nnz": 5038080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 824320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3997696, - "linear_dense_total": 4718592, - "linear_nnz": 4822016, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 746496, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4027392, - "linear_dense_total": 4718592, - "linear_nnz": 4773888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 670720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3737600, - "linear_dense_total": 4718592, - "linear_nnz": 4408320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 794624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2995200, - "linear_dense_total": 4718592, - "linear_nnz": 3789824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 419840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1756160, - "linear_dense_total": 4718592, - "linear_nnz": 2176000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 411648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 600064, - "linear_dense_total": 4718592, - "linear_nnz": 1011712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 40469504, - "linear_sparsity": 52.35218942901234, - "linear_total": 84934656, - "nnz": 64400930, - "total": 108893186, - "total_sparsity": 40.85862268737366 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 79.22421948912014, - "f1": 87.0664817371684 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 24.675214500427245, - "eval_elapsed_time": 31.986000607255846 - }, - "speedup": 1.5640955422982379, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 501760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3380224, - "linear_dense_total": 4718592, - "linear_nnz": 3881984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 528384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3656704, - "linear_dense_total": 4718592, - "linear_nnz": 4185088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 313344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 561152, - "linear_dense_total": 4718592, - "linear_nnz": 874496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 200704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 617472, - "linear_dense_total": 4718592, - "linear_nnz": 818176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 581632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4121600, - "linear_dense_total": 4718592, - "linear_nnz": 4703232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 916480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4144128, - "linear_dense_total": 4718592, - "linear_nnz": 5060608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 833536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4060160, - "linear_dense_total": 4718592, - "linear_nnz": 4893696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 741376, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4076544, - "linear_dense_total": 4718592, - "linear_nnz": 4817920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 644096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3815424, - "linear_dense_total": 4718592, - "linear_nnz": 4459520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 757760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2962432, - "linear_dense_total": 4718592, - "linear_nnz": 3720192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 380928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1689600, - "linear_dense_total": 4718592, - "linear_nnz": 2070528, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 395264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 571392, - "linear_dense_total": 4718592, - "linear_nnz": 966656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 40452096, - "linear_sparsity": 52.37268518518518, - "linear_total": 84934656, - "nnz": 64383586, - "total": 108893186, - "total_sparsity": 40.874550222086434 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": { - "eval_metrics": { - "exact_match": 79.67833491012298, - "f1": 87.14623278516426 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 25.61453672027588, - "eval_elapsed_time": 32.96429116372019 - }, - "speedup": 1.5067378897710322, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 571392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3765248, - "linear_dense_total": 4718592, - "linear_nnz": 4336640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 599040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3852288, - "linear_dense_total": 4718592, - "linear_nnz": 4451328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 374784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 672768, - "linear_dense_total": 4718592, - "linear_nnz": 1047552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 235520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 706560, - "linear_dense_total": 4718592, - "linear_nnz": 942080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 695296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4087808, - "linear_dense_total": 4718592, - "linear_nnz": 4783104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 996352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4050944, - "linear_dense_total": 4718592, - "linear_nnz": 5047296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 923648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4109312, - "linear_dense_total": 4718592, - "linear_nnz": 5032960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 865280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4041728, - "linear_dense_total": 4718592, - "linear_nnz": 4907008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 778240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3858432, - "linear_dense_total": 4718592, - "linear_nnz": 4636672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 883712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3359744, - "linear_dense_total": 4718592, - "linear_nnz": 4243456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 513024, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2305024, - "linear_dense_total": 4718592, - "linear_nnz": 2818048, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 462848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 826368, - "linear_dense_total": 4718592, - "linear_nnz": 1289216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 43535360, - "linear_sparsity": 48.742525077160494, - "linear_total": 84934656, - "nnz": 67469538, - "total": 108893186, - "total_sparsity": 38.04062450702838 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 78.94985808893094, - "f1": 86.768721062838 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 21.874919250488283, - "eval_elapsed_time": 29.121937923133373 - }, - "speedup": 1.7643216216448254, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 547840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1844224, - "linear_dense_total": 4718592, - "linear_nnz": 2392064, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 546816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2172928, - "linear_dense_total": 4718592, - "linear_nnz": 2719744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 356352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 392192, - "linear_dense_total": 4718592, - "linear_nnz": 748544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 217088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 434176, - "linear_dense_total": 4718592, - "linear_nnz": 651264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 675840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3196928, - "linear_dense_total": 4718592, - "linear_nnz": 3872768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 965632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3111936, - "linear_dense_total": 4718592, - "linear_nnz": 4077568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 896000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3107840, - "linear_dense_total": 4718592, - "linear_nnz": 4003840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 696320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3136512, - "linear_dense_total": 4718592, - "linear_nnz": 3832832, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 755712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2525184, - "linear_dense_total": 4718592, - "linear_nnz": 3280896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 799744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1711104, - "linear_dense_total": 4718592, - "linear_nnz": 2510848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 509952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 747520, - "linear_dense_total": 4718592, - "linear_nnz": 1257472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 420864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 261120, - "linear_dense_total": 4718592, - "linear_nnz": 681984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30029824, - "linear_sparsity": 64.6436149691358, - "linear_total": 84934656, - "nnz": 53955042, - "total": 108893186, - "total_sparsity": 50.45140657377771 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 77.8713339640492, - "f1": 85.84893170709621 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 19.24458102798462, - "eval_elapsed_time": 26.45731420116499 - }, - "speedup": 2.0054680821187447, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 647168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 789504, - "linear_dense_total": 4718592, - "linear_nnz": 1436672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 591872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1206272, - "linear_dense_total": 4718592, - "linear_nnz": 1798144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 359424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 263168, - "linear_dense_total": 4718592, - "linear_nnz": 622592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 240640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 271360, - "linear_dense_total": 4718592, - "linear_nnz": 512000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 843776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1739776, - "linear_dense_total": 4718592, - "linear_nnz": 2583552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1118208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1857536, - "linear_dense_total": 4718592, - "linear_nnz": 2975744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 913408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1760256, - "linear_dense_total": 4718592, - "linear_nnz": 2673664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 791552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1718272, - "linear_dense_total": 4718592, - "linear_nnz": 2509824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 755712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1330176, - "linear_dense_total": 4718592, - "linear_nnz": 2085888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 827392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 904192, - "linear_dense_total": 4718592, - "linear_nnz": 1731584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 726016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 257024, - "linear_dense_total": 4718592, - "linear_nnz": 983040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 464896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 118784, - "linear_dense_total": 4718592, - "linear_nnz": 583680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 20496384, - "linear_sparsity": 75.86805555555556, - "linear_total": 84934656, - "nnz": 44413282, - "total": 108893186, - "total_sparsity": 59.21390159343854 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": { - "eval_metrics": { - "exact_match": 77.92809839167455, - "f1": 85.97854187426412 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 19.635457836151122, - "eval_elapsed_time": 26.92565976222977 - }, - "speedup": 1.9655458674518098, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 679936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 869376, - "linear_dense_total": 4718592, - "linear_nnz": 1549312, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 599040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1269760, - "linear_dense_total": 4718592, - "linear_nnz": 1868800, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 379904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 282624, - "linear_dense_total": 4718592, - "linear_nnz": 662528, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 258048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 290816, - "linear_dense_total": 4718592, - "linear_nnz": 548864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 875520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1863680, - "linear_dense_total": 4718592, - "linear_nnz": 2739200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1137664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1950720, - "linear_dense_total": 4718592, - "linear_nnz": 3088384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1033216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1787904, - "linear_dense_total": 4718592, - "linear_nnz": 2821120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 850944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1858560, - "linear_dense_total": 4718592, - "linear_nnz": 2709504, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 798720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1426432, - "linear_dense_total": 4718592, - "linear_nnz": 2225152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 878592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 987136, - "linear_dense_total": 4718592, - "linear_nnz": 1865728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 782336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 267264, - "linear_dense_total": 4718592, - "linear_nnz": 1049600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 504832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 144384, - "linear_dense_total": 4718592, - "linear_nnz": 649216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 21777408, - "linear_sparsity": 74.35980902777779, - "linear_total": 84934656, - "nnz": 45695714, - "total": 108893186, - "total_sparsity": 58.036204395746125 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "eval_metrics": { - "exact_match": 76.79280983916746, - "f1": 85.3167029862563 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 30 - }, - "speed": { - "cuda_eval_elapsed_time": 16.872496753692626, - "eval_elapsed_time": 24.01387820020318 - }, - "speedup": 2.2874144573134694, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 512000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 512000, - "linear_dense_total": 4718592, - "linear_nnz": 1024000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 551936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 685056, - "linear_dense_total": 4718592, - "linear_nnz": 1236992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 304128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 197632, - "linear_dense_total": 4718592, - "linear_nnz": 501760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 197632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 220160, - "linear_dense_total": 4718592, - "linear_nnz": 417792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 722944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1211392, - "linear_dense_total": 4718592, - "linear_nnz": 1934336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 954368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1397760, - "linear_dense_total": 4718592, - "linear_nnz": 2352128, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 790528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1238016, - "linear_dense_total": 4718592, - "linear_nnz": 2028544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 584704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1295360, - "linear_dense_total": 4718592, - "linear_nnz": 1880064, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 608256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1018880, - "linear_dense_total": 4718592, - "linear_nnz": 1627136, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 740352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 576512, - "linear_dense_total": 4718592, - "linear_nnz": 1316864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 510976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 162816, - "linear_dense_total": 4718592, - "linear_nnz": 673792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 357376, - "linear_attention_total": 2359296, - "linear_dense_nnz": 94208, - "linear_dense_total": 4718592, - "linear_nnz": 451584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15444992, - "linear_sparsity": 81.81544174382715, - "linear_total": 84934656, - "nnz": 39356610, - "total": 108893186, - "total_sparsity": 63.85760078688487 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 77.04824976348155, - "f1": 85.17930403802184 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 30 - }, - "speed": { - "cuda_eval_elapsed_time": 16.85802384185791, - "eval_elapsed_time": 24.0219326200895 - }, - "speedup": 2.289378243109522, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 513024, - "linear_attention_total": 2359296, - "linear_dense_nnz": 519168, - "linear_dense_total": 4718592, - "linear_nnz": 1032192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 523264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 692224, - "linear_dense_total": 4718592, - "linear_nnz": 1215488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 312320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 206848, - "linear_dense_total": 4718592, - "linear_nnz": 519168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 186368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 215040, - "linear_dense_total": 4718592, - "linear_nnz": 401408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 683008, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1239040, - "linear_dense_total": 4718592, - "linear_nnz": 1922048, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 945152, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1374208, - "linear_dense_total": 4718592, - "linear_nnz": 2319360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 809984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1235968, - "linear_dense_total": 4718592, - "linear_nnz": 2045952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 581632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1265664, - "linear_dense_total": 4718592, - "linear_nnz": 1847296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 600064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1007616, - "linear_dense_total": 4718592, - "linear_nnz": 1607680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 708608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 578560, - "linear_dense_total": 4718592, - "linear_nnz": 1287168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 473088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 158720, - "linear_dense_total": 4718592, - "linear_nnz": 631808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 352256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 90112, - "linear_dense_total": 4718592, - "linear_nnz": 442368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15271936, - "linear_sparsity": 82.0191936728395, - "linear_total": 84934656, - "nnz": 39183362, - "total": 108893186, - "total_sparsity": 64.01669981444019 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 77.70104068117313, - "f1": 85.88451743537976 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 20.68525614929199, - "eval_elapsed_time": 27.97377561684698 - }, - "speedup": 1.8657923656745288, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 413696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2119680, - "linear_dense_total": 4718592, - "linear_nnz": 2533376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 364544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2476032, - "linear_dense_total": 4718592, - "linear_nnz": 2840576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 237568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 386048, - "linear_dense_total": 4718592, - "linear_nnz": 623616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 144384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 460800, - "linear_dense_total": 4718592, - "linear_nnz": 605184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 397312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3445760, - "linear_dense_total": 4718592, - "linear_nnz": 3843072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 666624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3402752, - "linear_dense_total": 4718592, - "linear_nnz": 4069376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 492544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3339264, - "linear_dense_total": 4718592, - "linear_nnz": 3831808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 519168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3194880, - "linear_dense_total": 4718592, - "linear_nnz": 3714048, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 448512, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2751488, - "linear_dense_total": 4718592, - "linear_nnz": 3200000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 576512, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1839104, - "linear_dense_total": 4718592, - "linear_nnz": 2415616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 271360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 940032, - "linear_dense_total": 4718592, - "linear_nnz": 1211392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 317440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 302080, - "linear_dense_total": 4718592, - "linear_nnz": 619520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 29507584, - "linear_sparsity": 65.25848765432099, - "linear_total": 84934656, - "nnz": 53430466, - "total": 108893186, - "total_sparsity": 50.93314103235074 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": { - "eval_metrics": { - "exact_match": 77.68211920529801, - "f1": 86.11161494070976 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 21.577418830871583, - "eval_elapsed_time": 28.903804030269384 - }, - "speedup": 1.7886473497076825, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 459776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2385920, - "linear_dense_total": 4718592, - "linear_nnz": 2845696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 374784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2798592, - "linear_dense_total": 4718592, - "linear_nnz": 3173376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 254976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 416768, - "linear_dense_total": 4718592, - "linear_nnz": 671744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 165888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 466944, - "linear_dense_total": 4718592, - "linear_nnz": 632832, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 411648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3454976, - "linear_dense_total": 4718592, - "linear_nnz": 3866624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 727040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3496960, - "linear_dense_total": 4718592, - "linear_nnz": 4224000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 541696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3412992, - "linear_dense_total": 4718592, - "linear_nnz": 3954688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 545792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3447808, - "linear_dense_total": 4718592, - "linear_nnz": 3993600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 493568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2933760, - "linear_dense_total": 4718592, - "linear_nnz": 3427328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 641024, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2000896, - "linear_dense_total": 4718592, - "linear_nnz": 2641920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 288768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1004544, - "linear_dense_total": 4718592, - "linear_nnz": 1293312, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 338944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 339968, - "linear_dense_total": 4718592, - "linear_nnz": 678912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 31404032, - "linear_sparsity": 63.025655864197525, - "linear_total": 84934656, - "nnz": 55329122, - "total": 108893186, - "total_sparsity": 49.1895461668281 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": { - "eval_metrics": { - "exact_match": 77.96594134342479, - "f1": 86.01491496793933 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 21.28239717102051, - "eval_elapsed_time": 28.641465611290187 - }, - "speedup": 1.8134420053923117, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 435200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2226176, - "linear_dense_total": 4718592, - "linear_nnz": 2661376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 359424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2727936, - "linear_dense_total": 4718592, - "linear_nnz": 3087360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 252928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 411648, - "linear_dense_total": 4718592, - "linear_nnz": 664576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 158720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 487424, - "linear_dense_total": 4718592, - "linear_nnz": 646144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 421888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3473408, - "linear_dense_total": 4718592, - "linear_nnz": 3895296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 710656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3451904, - "linear_dense_total": 4718592, - "linear_nnz": 4162560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 547840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3437568, - "linear_dense_total": 4718592, - "linear_nnz": 3985408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 556032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3325952, - "linear_dense_total": 4718592, - "linear_nnz": 3881984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 512000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2828288, - "linear_dense_total": 4718592, - "linear_nnz": 3340288, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 622592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1991680, - "linear_dense_total": 4718592, - "linear_nnz": 2614272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 276480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 979968, - "linear_dense_total": 4718592, - "linear_nnz": 1256448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 337920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 330752, - "linear_dense_total": 4718592, - "linear_nnz": 668672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30864384, - "linear_sparsity": 63.66102430555556, - "linear_total": 84934656, - "nnz": 54788706, - "total": 108893186, - "total_sparsity": 49.68582699012958 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "eval_metrics": { - "exact_match": 76.92526017029329, - "f1": 85.21713644985097 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 17.63341423416138, - "eval_elapsed_time": 24.82955563813448 - }, - "speedup": 2.1887078981336363, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 443392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1026048, - "linear_dense_total": 4718592, - "linear_nnz": 1469440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 396288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1296384, - "linear_dense_total": 4718592, - "linear_nnz": 1692672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 237568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 308224, - "linear_dense_total": 4718592, - "linear_nnz": 545792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 152576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 315392, - "linear_dense_total": 4718592, - "linear_nnz": 467968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 578560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2113536, - "linear_dense_total": 4718592, - "linear_nnz": 2692096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 755712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1973248, - "linear_dense_total": 4718592, - "linear_nnz": 2728960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 565248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1966080, - "linear_dense_total": 4718592, - "linear_nnz": 2531328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 546816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1887232, - "linear_dense_total": 4718592, - "linear_nnz": 2434048, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 476160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1502208, - "linear_dense_total": 4718592, - "linear_nnz": 1978368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 637952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1000448, - "linear_dense_total": 4718592, - "linear_nnz": 1638400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 310272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 310272, - "linear_dense_total": 4718592, - "linear_nnz": 620544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 313344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 144384, - "linear_dense_total": 4718592, - "linear_nnz": 457728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 19257344, - "linear_sparsity": 77.3268711419753, - "linear_total": 84934656, - "nnz": 43172098, - "total": 108893186, - "total_sparsity": 60.35371946964616 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 77.08609271523179, - "f1": 85.20287591064626 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 17.564620765686037, - "eval_elapsed_time": 24.740368818864226 - }, - "speedup": 2.1972801758844964, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 455680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1007616, - "linear_dense_total": 4718592, - "linear_nnz": 1463296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 399360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1300480, - "linear_dense_total": 4718592, - "linear_nnz": 1699840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 240640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 305152, - "linear_dense_total": 4718592, - "linear_nnz": 545792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 144384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 329728, - "linear_dense_total": 4718592, - "linear_nnz": 474112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 544768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2180096, - "linear_dense_total": 4718592, - "linear_nnz": 2724864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 731136, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1939456, - "linear_dense_total": 4718592, - "linear_nnz": 2670592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 557056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1941504, - "linear_dense_total": 4718592, - "linear_nnz": 2498560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 527360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1880064, - "linear_dense_total": 4718592, - "linear_nnz": 2407424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 472064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1456128, - "linear_dense_total": 4718592, - "linear_nnz": 1928192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 607232, - "linear_attention_total": 2359296, - "linear_dense_nnz": 977920, - "linear_dense_total": 4718592, - "linear_nnz": 1585152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 289792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 317440, - "linear_dense_total": 4718592, - "linear_nnz": 607232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 308224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 147456, - "linear_dense_total": 4718592, - "linear_nnz": 455680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 19060736, - "linear_sparsity": 77.55835262345678, - "linear_total": 84934656, - "nnz": 42975330, - "total": 108893186, - "total_sparsity": 60.53441764482857 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": { - "eval_metrics": { - "exact_match": 77.01986754966887, - "f1": 85.2617013700351 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 18.277880432128907, - "eval_elapsed_time": 25.53750513214618 - }, - "speedup": 2.1115354785629177, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 480256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1234944, - "linear_dense_total": 4718592, - "linear_nnz": 1715200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 400384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1495040, - "linear_dense_total": 4718592, - "linear_nnz": 1895424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 267264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 326656, - "linear_dense_total": 4718592, - "linear_nnz": 593920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 163840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 337920, - "linear_dense_total": 4718592, - "linear_nnz": 501760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 594944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2417664, - "linear_dense_total": 4718592, - "linear_nnz": 3012608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 813056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2281472, - "linear_dense_total": 4718592, - "linear_nnz": 3094528, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 599040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2163712, - "linear_dense_total": 4718592, - "linear_nnz": 2762752, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 562176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2145280, - "linear_dense_total": 4718592, - "linear_nnz": 2707456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 531456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1701888, - "linear_dense_total": 4718592, - "linear_nnz": 2233344, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 678912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1062912, - "linear_dense_total": 4718592, - "linear_nnz": 1741824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 338944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 370688, - "linear_dense_total": 4718592, - "linear_nnz": 709632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 359424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 164864, - "linear_dense_total": 4718592, - "linear_nnz": 524288, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 21492736, - "linear_sparsity": 74.6949749228395, - "linear_total": 84934656, - "nnz": 45409666, - "total": 108893186, - "total_sparsity": 58.29889117212532 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": { - "eval_metrics": { - "exact_match": 76.98202459791864, - "f1": 85.22056943761015 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 17.92396342086792, - "eval_elapsed_time": 25.119796799961478 - }, - "speedup": 2.153228730674472, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 458752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1139712, - "linear_dense_total": 4718592, - "linear_nnz": 1598464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 398336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1427456, - "linear_dense_total": 4718592, - "linear_nnz": 1825792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 271360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 326656, - "linear_dense_total": 4718592, - "linear_nnz": 598016, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 162816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 347136, - "linear_dense_total": 4718592, - "linear_nnz": 509952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 596992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2257920, - "linear_dense_total": 4718592, - "linear_nnz": 2854912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 781312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2123776, - "linear_dense_total": 4718592, - "linear_nnz": 2905088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 620544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2023424, - "linear_dense_total": 4718592, - "linear_nnz": 2643968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 573440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1970176, - "linear_dense_total": 4718592, - "linear_nnz": 2543616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 460800, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1588224, - "linear_dense_total": 4718592, - "linear_nnz": 2049024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 638976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1069056, - "linear_dense_total": 4718592, - "linear_nnz": 1708032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 307200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 359424, - "linear_dense_total": 4718592, - "linear_nnz": 666624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 327680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 161792, - "linear_dense_total": 4718592, - "linear_nnz": 489472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 20392960, - "linear_sparsity": 75.98982445987654, - "linear_total": 84934656, - "nnz": 44308674, - "total": 108893186, - "total_sparsity": 59.309966373837206 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl5_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "eval_metrics": { - "exact_match": 81.51371807000946, - "f1": 88.67903677006836 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5 - }, - "speed": { - "cuda_eval_elapsed_time": 31.30978426361084, - "eval_elapsed_time": 38.71227815328166 - }, - "speedup": 1.232662374177603, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 804864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4457472, - "linear_dense_total": 4718592, - "linear_nnz": 5262336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 771072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4464640, - "linear_dense_total": 4718592, - "linear_nnz": 5235712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 614400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1607680, - "linear_dense_total": 4718592, - "linear_nnz": 2222080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 389120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1230848, - "linear_dense_total": 4718592, - "linear_nnz": 1619968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1152000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4517888, - "linear_dense_total": 4718592, - "linear_nnz": 5669888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1312768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4562944, - "linear_dense_total": 4718592, - "linear_nnz": 5875712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1501184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4555776, - "linear_dense_total": 4718592, - "linear_nnz": 6056960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1377280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4520960, - "linear_dense_total": 4718592, - "linear_nnz": 5898240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1357824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4473856, - "linear_dense_total": 4718592, - "linear_nnz": 5831680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1192960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4290560, - "linear_dense_total": 4718592, - "linear_nnz": 5483520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1069056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3889152, - "linear_dense_total": 4718592, - "linear_nnz": 4958208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 718848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2518016, - "linear_dense_total": 4718592, - "linear_nnz": 3236864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 57351168, - "linear_sparsity": 32.47612847222222, - "linear_total": 84934656, - "nnz": 81295202, - "total": 108893186, - "total_sparsity": 25.344087186502197 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl5_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": { - "eval_metrics": { - "exact_match": 81.47587511825922, - "f1": 88.73698799207777 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5 - }, - "speed": { - "cuda_eval_elapsed_time": 31.817585739135744, - "eval_elapsed_time": 39.2419764213264 - }, - "speedup": 1.2129893613486789, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 921600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4316160, - "linear_dense_total": 4718592, - "linear_nnz": 5237760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 829440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4311040, - "linear_dense_total": 4718592, - "linear_nnz": 5140480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 671744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2001920, - "linear_dense_total": 4718592, - "linear_nnz": 2673664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 409600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1304576, - "linear_dense_total": 4718592, - "linear_nnz": 1714176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1221632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4419584, - "linear_dense_total": 4718592, - "linear_nnz": 5641216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1386496, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4429824, - "linear_dense_total": 4718592, - "linear_nnz": 5816320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1540096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4457472, - "linear_dense_total": 4718592, - "linear_nnz": 5997568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1548288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4420608, - "linear_dense_total": 4718592, - "linear_nnz": 5968896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1364992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4320256, - "linear_dense_total": 4718592, - "linear_nnz": 5685248, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1272832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4186112, - "linear_dense_total": 4718592, - "linear_nnz": 5458944, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1173504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3787776, - "linear_dense_total": 4718592, - "linear_nnz": 4961280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 727040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2839552, - "linear_dense_total": 4718592, - "linear_nnz": 3566592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 57862144, - "linear_sparsity": 31.87451774691358, - "linear_total": 84934656, - "nnz": 81807426, - "total": 108893186, - "total_sparsity": 24.873695953757846 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13/checkpoint-110660": { - "eval_metrics": { - "exact_match": 79.66887417218543, - "f1": 87.3881230572442 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 17.326403350830077, - "eval_elapsed_time": 24.523588876239955 - }, - "speedup": 2.227490161916501, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 643072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 634368, - "linear_dense_total": 4718592, - "linear_nnz": 1277440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 622592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 916992, - "linear_dense_total": 4718592, - "linear_nnz": 1539584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 463872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 112128, - "linear_dense_total": 4718592, - "linear_nnz": 576000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 278528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 313344, - "linear_dense_total": 4718592, - "linear_nnz": 591872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1051648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1016832, - "linear_dense_total": 4718592, - "linear_nnz": 2068480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1257472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1076736, - "linear_dense_total": 4718592, - "linear_nnz": 2334208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1315840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1158144, - "linear_dense_total": 4718592, - "linear_nnz": 2473984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1004544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1073664, - "linear_dense_total": 4718592, - "linear_nnz": 2078208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1004544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 815616, - "linear_dense_total": 4718592, - "linear_nnz": 1820160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 925696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 629760, - "linear_dense_total": 4718592, - "linear_nnz": 1555456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 899072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 337920, - "linear_dense_total": 4718592, - "linear_nnz": 1236992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 523264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 139776, - "linear_dense_total": 4718592, - "linear_nnz": 663040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 18215424, - "linear_sparsity": 78.55360243055556, - "linear_total": 84934656, - "nnz": 42128141, - "total": 108893186, - "total_sparsity": 61.31241765669342 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40/checkpoint-110660": { - "eval_metrics": { - "exact_match": 79.39451277199622, - "f1": 87.14755939306319 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 17.057066314697266, - "eval_elapsed_time": 24.182081679347903 - }, - "speedup": 2.262663009764823, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 809984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 297984, - "linear_dense_total": 4718592, - "linear_nnz": 1107968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 720896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 483840, - "linear_dense_total": 4718592, - "linear_nnz": 1204736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 478208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 73728, - "linear_dense_total": 4718592, - "linear_nnz": 551936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 312320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 159744, - "linear_dense_total": 4718592, - "linear_nnz": 472064, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1098752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 619008, - "linear_dense_total": 4718592, - "linear_nnz": 1717760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1309696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 657408, - "linear_dense_total": 4718592, - "linear_nnz": 1967104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1362944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 705024, - "linear_dense_total": 4718592, - "linear_nnz": 2067968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1074176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 668160, - "linear_dense_total": 4718592, - "linear_nnz": 1742336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1049600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 516096, - "linear_dense_total": 4718592, - "linear_nnz": 1565696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 958464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 384000, - "linear_dense_total": 4718592, - "linear_nnz": 1342464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 949248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 204288, - "linear_dense_total": 4718592, - "linear_nnz": 1153536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 636928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 92160, - "linear_dense_total": 4718592, - "linear_nnz": 729088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15622656, - "linear_sparsity": 81.6062644675926, - "linear_total": 84934656, - "nnz": 39533983, - "total": 108893186, - "total_sparsity": 63.694713643514845 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl2--2021-01-21--00-53-13/checkpoint-110660": { - "eval_metrics": { - "exact_match": 78.67549668874172, - "f1": 86.51098653495667 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 2.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 17.252509830474853, - "eval_elapsed_time": 24.480217491276562 - }, - "speedup": 2.2370306340702912, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 864256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 127488, - "linear_dense_total": 4718592, - "linear_nnz": 991744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 748544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 216576, - "linear_dense_total": 4718592, - "linear_nnz": 965120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 502784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 53760, - "linear_dense_total": 4718592, - "linear_nnz": 556544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 360448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 81408, - "linear_dense_total": 4718592, - "linear_nnz": 441856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1163264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 324096, - "linear_dense_total": 4718592, - "linear_nnz": 1487360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1389568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 377856, - "linear_dense_total": 4718592, - "linear_nnz": 1767424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1449984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 414720, - "linear_dense_total": 4718592, - "linear_nnz": 1864704, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1349632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 364032, - "linear_dense_total": 4718592, - "linear_nnz": 1713664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1187840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 293376, - "linear_dense_total": 4718592, - "linear_nnz": 1481216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 964608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 225792, - "linear_dense_total": 4718592, - "linear_nnz": 1190400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1063936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 127488, - "linear_dense_total": 4718592, - "linear_nnz": 1191424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 650240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 58368, - "linear_dense_total": 4718592, - "linear_nnz": 708608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 14360064, - "linear_sparsity": 83.0928096064815, - "linear_total": 84934656, - "nnz": 38271273, - "total": 108893186, - "total_sparsity": 64.85429951512302 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-110000": { - "eval_metrics": { - "exact_match": 78.3349101229896, - "f1": 86.4116267700138 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 14.760263885498047, - "eval_elapsed_time": 21.897933847736567 - }, - "speedup": 2.6147495264830645, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 522240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 411648, - "linear_dense_total": 4718592, - "linear_nnz": 933888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 523264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 592896, - "linear_dense_total": 4718592, - "linear_nnz": 1116160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 367616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 87552, - "linear_dense_total": 4718592, - "linear_nnz": 455168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 222208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 199680, - "linear_dense_total": 4718592, - "linear_nnz": 421888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 675840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 698880, - "linear_dense_total": 4718592, - "linear_nnz": 1374720, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 977920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 714240, - "linear_dense_total": 4718592, - "linear_nnz": 1692160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 825344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 834048, - "linear_dense_total": 4718592, - "linear_nnz": 1659392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 672768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 743424, - "linear_dense_total": 4718592, - "linear_nnz": 1416192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 638976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 568320, - "linear_dense_total": 4718592, - "linear_nnz": 1207296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 785408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 450048, - "linear_dense_total": 4718592, - "linear_nnz": 1235456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 514048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 264192, - "linear_dense_total": 4718592, - "linear_nnz": 778240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 424960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 101376, - "linear_dense_total": 4718592, - "linear_nnz": 526336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 12816896, - "linear_sparsity": 84.9096981095679, - "linear_total": 84934656, - "nnz": 36724619, - "total": 108893186, - "total_sparsity": 66.2746399944621 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-110660": { - "eval_metrics": { - "exact_match": 78.37275307473983, - "f1": 86.39441106336629 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 14.746898548126222, - "eval_elapsed_time": 21.86237431317568 - }, - "speedup": 2.61711931355729, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 519168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 411648, - "linear_dense_total": 4718592, - "linear_nnz": 930816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 536576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 592896, - "linear_dense_total": 4718592, - "linear_nnz": 1129472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 356352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 87552, - "linear_dense_total": 4718592, - "linear_nnz": 443904, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 226304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 199680, - "linear_dense_total": 4718592, - "linear_nnz": 425984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 667648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 698880, - "linear_dense_total": 4718592, - "linear_nnz": 1366528, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 967680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 714240, - "linear_dense_total": 4718592, - "linear_nnz": 1681920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 835584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 834048, - "linear_dense_total": 4718592, - "linear_nnz": 1669632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 668672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 743424, - "linear_dense_total": 4718592, - "linear_nnz": 1412096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 653312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 568320, - "linear_dense_total": 4718592, - "linear_nnz": 1221632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 787456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 450048, - "linear_dense_total": 4718592, - "linear_nnz": 1237504, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 493568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 264192, - "linear_dense_total": 4718592, - "linear_nnz": 757760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 424960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 101376, - "linear_dense_total": 4718592, - "linear_nnz": 526336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 12803584, - "linear_sparsity": 84.92537133487654, - "linear_total": 84934656, - "nnz": 36711275, - "total": 108893186, - "total_sparsity": 66.28689420474849 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15/checkpoint-110660": { - "eval_metrics": { - "exact_match": 77.8240302743614, - "f1": 86.11992485005756 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 14.268565601348877, - "eval_elapsed_time": 21.374552259687334 - }, - "speedup": 2.704854439028025, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 550912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 181248, - "linear_dense_total": 4718592, - "linear_nnz": 732160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 535552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 299520, - "linear_dense_total": 4718592, - "linear_nnz": 835072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 364544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 58368, - "linear_dense_total": 4718592, - "linear_nnz": 422912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 239616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 96768, - "linear_dense_total": 4718592, - "linear_nnz": 336384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 721920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 407040, - "linear_dense_total": 4718592, - "linear_nnz": 1128960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1111040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 440832, - "linear_dense_total": 4718592, - "linear_nnz": 1551872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 892928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 496128, - "linear_dense_total": 4718592, - "linear_nnz": 1389056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 663552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 433152, - "linear_dense_total": 4718592, - "linear_nnz": 1096704, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 662528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 337920, - "linear_dense_total": 4718592, - "linear_nnz": 1000448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 801792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 268800, - "linear_dense_total": 4718592, - "linear_nnz": 1070592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 645120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 158208, - "linear_dense_total": 4718592, - "linear_nnz": 803328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 424960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 73728, - "linear_dense_total": 4718592, - "linear_nnz": 498688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 10866176, - "linear_sparsity": 87.20642843364197, - "linear_total": 84934656, - "nnz": 34772839, - "total": 108893186, - "total_sparsity": 68.06702028169144 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl2--2021-01-21--00-54-43/checkpoint-110660": { - "eval_metrics": { - "exact_match": 76.9914853358562, - "f1": 85.26341062121247 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 2.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 14.846498733520509, - "eval_elapsed_time": 21.962527931667864 - }, - "speedup": 2.599561936999493, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 598016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 76800, - "linear_dense_total": 4718592, - "linear_nnz": 674816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 621568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 129024, - "linear_dense_total": 4718592, - "linear_nnz": 750592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 395264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 36864, - "linear_dense_total": 4718592, - "linear_nnz": 432128, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 238592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 52224, - "linear_dense_total": 4718592, - "linear_nnz": 290816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 937984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 199680, - "linear_dense_total": 4718592, - "linear_nnz": 1137664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1193984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 264192, - "linear_dense_total": 4718592, - "linear_nnz": 1458176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1057792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 278016, - "linear_dense_total": 4718592, - "linear_nnz": 1335808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 614400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 228864, - "linear_dense_total": 4718592, - "linear_nnz": 843264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 759808, - "linear_attention_total": 2359296, - "linear_dense_nnz": 188928, - "linear_dense_total": 4718592, - "linear_nnz": 948736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 830464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 148992, - "linear_dense_total": 4718592, - "linear_nnz": 979456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 753664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 79872, - "linear_dense_total": 4718592, - "linear_nnz": 833536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 432128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 46080, - "linear_dense_total": 4718592, - "linear_nnz": 478208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 10163200, - "linear_sparsity": 88.03409529320987, - "linear_total": 84934656, - "nnz": 34069864, - "total": 108893186, - "total_sparsity": 68.71258409134985 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l30-dl0-25--2021-01-23--20-20-19/checkpoint-110660": { - "eval_metrics": { - "exact_match": 78.06054872280038, - "f1": 86.20063710644014 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 30.0 - }, - "speed": { - "cuda_eval_elapsed_time": 14.394198833465577, - "eval_elapsed_time": 21.72890411503613 - }, - "speedup": 2.681246344578876, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 455680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 597504, - "linear_dense_total": 4718592, - "linear_nnz": 1053184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 364544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 854016, - "linear_dense_total": 4718592, - "linear_nnz": 1218560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 286720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 118272, - "linear_dense_total": 4718592, - "linear_nnz": 404992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 162816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 276480, - "linear_dense_total": 4718592, - "linear_nnz": 439296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 529408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 973824, - "linear_dense_total": 4718592, - "linear_nnz": 1503232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 749568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 964608, - "linear_dense_total": 4718592, - "linear_nnz": 1714176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 578560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1047552, - "linear_dense_total": 4718592, - "linear_nnz": 1626112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 600064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 992256, - "linear_dense_total": 4718592, - "linear_nnz": 1592320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 546816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 775680, - "linear_dense_total": 4718592, - "linear_nnz": 1322496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 686080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 615936, - "linear_dense_total": 4718592, - "linear_nnz": 1302016, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 335872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 342528, - "linear_dense_total": 4718592, - "linear_nnz": 678400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 358400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 135168, - "linear_dense_total": 4718592, - "linear_nnz": 493568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 13348352, - "linear_sparsity": 84.28397472993827, - "linear_total": 84934656, - "nnz": 37255475, - "total": 108893186, - "total_sparsity": 65.78713841653968 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l30-dl0-5--2021-01-23--20-19-50/checkpoint-110000": { - "eval_metrics": { - "exact_match": 77.11447492904446, - "f1": 85.59611837921153 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 30.0 - }, - "speed": { - "cuda_eval_elapsed_time": 13.499527885437011, - "eval_elapsed_time": 20.856850353069603 - }, - "speedup": 2.8589439077351635, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 432128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 290304, - "linear_dense_total": 4718592, - "linear_nnz": 722432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 489472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 459264, - "linear_dense_total": 4718592, - "linear_nnz": 948736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 309248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 73728, - "linear_dense_total": 4718592, - "linear_nnz": 382976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 198656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 153600, - "linear_dense_total": 4718592, - "linear_nnz": 352256, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 578560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 557568, - "linear_dense_total": 4718592, - "linear_nnz": 1136128, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 779264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 583680, - "linear_dense_total": 4718592, - "linear_nnz": 1362944, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 575488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 665088, - "linear_dense_total": 4718592, - "linear_nnz": 1240576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 540672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 614400, - "linear_dense_total": 4718592, - "linear_nnz": 1155072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 590848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 463872, - "linear_dense_total": 4718592, - "linear_nnz": 1054720, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 696320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 370176, - "linear_dense_total": 4718592, - "linear_nnz": 1066496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 388096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 235008, - "linear_dense_total": 4718592, - "linear_nnz": 623104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 363520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 89088, - "linear_dense_total": 4718592, - "linear_nnz": 452608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 10498048, - "linear_sparsity": 87.63985339506173, - "linear_total": 84934656, - "nnz": 34403512, - "total": 108893186, - "total_sparsity": 68.40618475429675 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l30-dl0-5--2021-01-23--20-19-50/checkpoint-110660": { - "eval_metrics": { - "exact_match": 77.37937559129612, - "f1": 85.69020560735045 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 30.0 - }, - "speed": { - "cuda_eval_elapsed_time": 13.492438529968261, - "eval_elapsed_time": 20.86975116888061 - }, - "speedup": 2.860446087610368, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 451584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 290304, - "linear_dense_total": 4718592, - "linear_nnz": 741888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 495616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 459264, - "linear_dense_total": 4718592, - "linear_nnz": 954880, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 296960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 73728, - "linear_dense_total": 4718592, - "linear_nnz": 370688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 194560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 153600, - "linear_dense_total": 4718592, - "linear_nnz": 348160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 583680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 557568, - "linear_dense_total": 4718592, - "linear_nnz": 1141248, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 789504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 583680, - "linear_dense_total": 4718592, - "linear_nnz": 1373184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 582656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 665088, - "linear_dense_total": 4718592, - "linear_nnz": 1247744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 548864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 614400, - "linear_dense_total": 4718592, - "linear_nnz": 1163264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 578560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 463872, - "linear_dense_total": 4718592, - "linear_nnz": 1042432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 715776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 370176, - "linear_dense_total": 4718592, - "linear_nnz": 1085952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 375808, - "linear_attention_total": 2359296, - "linear_dense_nnz": 235008, - "linear_dense_total": 4718592, - "linear_nnz": 610816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 347136, - "linear_attention_total": 2359296, - "linear_dense_nnz": 89088, - "linear_dense_total": 4718592, - "linear_nnz": 436224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 10516480, - "linear_sparsity": 87.61815200617285, - "linear_total": 84934656, - "nnz": 34421912, - "total": 108893186, - "total_sparsity": 68.3892874619354 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-110000": { - "eval_metrics": { - "exact_match": 80.48249763481552, - "f1": 88.07285498416482 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 20.669778549194337, - "eval_elapsed_time": 27.982159624807537 - }, - "speedup": 1.8671894773093938, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 991232, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1107456, - "linear_dense_total": 4718592, - "linear_nnz": 2098688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 730112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1273344, - "linear_dense_total": 4718592, - "linear_nnz": 2003456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 624640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 162816, - "linear_dense_total": 4718592, - "linear_nnz": 787456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 419840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 477696, - "linear_dense_total": 4718592, - "linear_nnz": 897536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1225728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1505280, - "linear_dense_total": 4718592, - "linear_nnz": 2731008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1433600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1543680, - "linear_dense_total": 4718592, - "linear_nnz": 2977280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1566720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1609728, - "linear_dense_total": 4718592, - "linear_nnz": 3176448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1508352, - "linear_dense_total": 4718592, - "linear_nnz": 3081216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1361920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1125888, - "linear_dense_total": 4718592, - "linear_nnz": 2487808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1166336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 837120, - "linear_dense_total": 4718592, - "linear_nnz": 2003456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1148928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 468480, - "linear_dense_total": 4718592, - "linear_nnz": 1617408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 738304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 207360, - "linear_dense_total": 4718592, - "linear_nnz": 945664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 24807424, - "linear_sparsity": 70.79234182098766, - "linear_total": 84934656, - "nnz": 48725622, - "total": 108893186, - "total_sparsity": 55.25374562922606 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-110660": { - "eval_metrics": { - "exact_match": 80.70009460737937, - "f1": 88.04831949879843 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 20.71169916152954, - "eval_elapsed_time": 28.054355942178518 - }, - "speedup": 1.863410273796239, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 978944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1107456, - "linear_dense_total": 4718592, - "linear_nnz": 2086400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 721920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1273344, - "linear_dense_total": 4718592, - "linear_nnz": 1995264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 615424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 162816, - "linear_dense_total": 4718592, - "linear_nnz": 778240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 403456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 477696, - "linear_dense_total": 4718592, - "linear_nnz": 881152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1232896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1505280, - "linear_dense_total": 4718592, - "linear_nnz": 2738176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1455104, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1543680, - "linear_dense_total": 4718592, - "linear_nnz": 2998784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1598464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1609728, - "linear_dense_total": 4718592, - "linear_nnz": 3208192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1596416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1508352, - "linear_dense_total": 4718592, - "linear_nnz": 3104768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1373184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1125888, - "linear_dense_total": 4718592, - "linear_nnz": 2499072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1165312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 837120, - "linear_dense_total": 4718592, - "linear_nnz": 2002432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1163264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 468480, - "linear_dense_total": 4718592, - "linear_nnz": 1631744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 740352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 207360, - "linear_dense_total": 4718592, - "linear_nnz": 947712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 24871936, - "linear_sparsity": 70.71638695987654, - "linear_total": 84934656, - "nnz": 48790134, - "total": 108893186, - "total_sparsity": 55.19450225287742 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-95000": { - "eval_metrics": { - "exact_match": 80.79470198675497, - "f1": 88.10958975740277 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 20.632953029632567, - "eval_elapsed_time": 27.97396031860262 - }, - "speedup": 1.8705220212512832, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 976896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1125888, - "linear_dense_total": 4718592, - "linear_nnz": 2102784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 733184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1285632, - "linear_dense_total": 4718592, - "linear_nnz": 2018816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 636928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 168960, - "linear_dense_total": 4718592, - "linear_nnz": 805888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 418816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 485376, - "linear_dense_total": 4718592, - "linear_nnz": 904192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1252352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1523712, - "linear_dense_total": 4718592, - "linear_nnz": 2776064, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1437696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1555968, - "linear_dense_total": 4718592, - "linear_nnz": 2993664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1545216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1617408, - "linear_dense_total": 4718592, - "linear_nnz": 3162624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1574912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1514496, - "linear_dense_total": 4718592, - "linear_nnz": 3089408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1370112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1135104, - "linear_dense_total": 4718592, - "linear_nnz": 2505216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1178624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 847872, - "linear_dense_total": 4718592, - "linear_nnz": 2026496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1190912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 474624, - "linear_dense_total": 4718592, - "linear_nnz": 1665536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 748544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 208896, - "linear_dense_total": 4718592, - "linear_nnz": 957440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 25008128, - "linear_sparsity": 70.55603780864197, - "linear_total": 84934656, - "nnz": 48926434, - "total": 108893186, - "total_sparsity": 55.069333723048565 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-110660": { - "eval_metrics": { - "exact_match": 80.58656575212866, - "f1": 88.06903108265608 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 19.595643711090087, - "eval_elapsed_time": 26.718373194802552 - }, - "speedup": 1.9695394330694393, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1055744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 526848, - "linear_dense_total": 4718592, - "linear_nnz": 1582592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 809984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 752640, - "linear_dense_total": 4718592, - "linear_nnz": 1562624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 652288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 98304, - "linear_dense_total": 4718592, - "linear_nnz": 750592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 419840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 262656, - "linear_dense_total": 4718592, - "linear_nnz": 682496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1316864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 873984, - "linear_dense_total": 4718592, - "linear_nnz": 2190848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1468416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 952320, - "linear_dense_total": 4718592, - "linear_nnz": 2420736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1651712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1046016, - "linear_dense_total": 4718592, - "linear_nnz": 2697728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1616896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 986112, - "linear_dense_total": 4718592, - "linear_nnz": 2603008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1361920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 740352, - "linear_dense_total": 4718592, - "linear_nnz": 2102272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1265664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 559104, - "linear_dense_total": 4718592, - "linear_nnz": 1824768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1212416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 293376, - "linear_dense_total": 4718592, - "linear_nnz": 1505792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 749568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 113664, - "linear_dense_total": 4718592, - "linear_nnz": 863232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 20786688, - "linear_sparsity": 75.52625868055556, - "linear_total": 84934656, - "nnz": 44702229, - "total": 108893186, - "total_sparsity": 58.94855257518133 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl2--2021-01-21--00-51-49/checkpoint-110660": { - "eval_metrics": { - "exact_match": 80.10406811731315, - "f1": 87.56487698206614 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 2.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 19.459814723968506, - "eval_elapsed_time": 26.6199238197878 - }, - "speedup": 1.9832867657180042, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1210368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 210432, - "linear_dense_total": 4718592, - "linear_nnz": 1420800, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 977920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 403968, - "linear_dense_total": 4718592, - "linear_nnz": 1381888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 712704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 69120, - "linear_dense_total": 4718592, - "linear_nnz": 781824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 443392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 136704, - "linear_dense_total": 4718592, - "linear_nnz": 580096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1500160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 513024, - "linear_dense_total": 4718592, - "linear_nnz": 2013184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1526784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 588288, - "linear_dense_total": 4718592, - "linear_nnz": 2115072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1734656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 660480, - "linear_dense_total": 4718592, - "linear_nnz": 2395136, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1659904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 551424, - "linear_dense_total": 4718592, - "linear_nnz": 2211328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1486848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 456192, - "linear_dense_total": 4718592, - "linear_nnz": 1943040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1254400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 336384, - "linear_dense_total": 4718592, - "linear_nnz": 1590784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1267712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 173568, - "linear_dense_total": 4718592, - "linear_nnz": 1441280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 760832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 76800, - "linear_dense_total": 4718592, - "linear_nnz": 837632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 18712064, - "linear_sparsity": 77.96887056327161, - "linear_total": 84934656, - "nnz": 42626625, - "total": 108893186, - "total_sparsity": 60.85464429335368 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a16-l10-dl1--2021-01-24--15-45-00/checkpoint-110660": { - "eval_metrics": { - "exact_match": 80.00946073793756, - "f1": 87.65780769915727 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 16, - "dense_block_rows": 16, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 26.317300163269042, - "eval_elapsed_time": 33.56822411296889 - }, - "speedup": 1.4665027478478643, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 720896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1657600, - "linear_dense_total": 4718592, - "linear_nnz": 2378496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 719872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2046464, - "linear_dense_total": 4718592, - "linear_nnz": 2766336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 450560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 272128, - "linear_dense_total": 4718592, - "linear_nnz": 722688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 307456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 311808, - "linear_dense_total": 4718592, - "linear_nnz": 619264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1058304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2721792, - "linear_dense_total": 4718592, - "linear_nnz": 3780096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1227776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2707200, - "linear_dense_total": 4718592, - "linear_nnz": 3934976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1367808, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2789888, - "linear_dense_total": 4718592, - "linear_nnz": 4157696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1258240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2672384, - "linear_dense_total": 4718592, - "linear_nnz": 3930624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1130496, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2136064, - "linear_dense_total": 4718592, - "linear_nnz": 3266560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 988928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1491200, - "linear_dense_total": 4718592, - "linear_nnz": 2480128, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 888576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 653568, - "linear_dense_total": 4718592, - "linear_nnz": 1542144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 567296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 249088, - "linear_dense_total": 4718592, - "linear_nnz": 816384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30395392, - "linear_sparsity": 64.21320408950618, - "linear_total": 84934656, - "nnz": 54326914, - "total": 108893186, - "total_sparsity": 50.10990494850615 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a16-l20-dl1--2021-01-24--15-45-27/checkpoint-110660": { - "eval_metrics": { - "exact_match": 78.7511825922422, - "f1": 86.70333537174074 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 16, - "dense_block_rows": 16, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 21.08596396636963, - "eval_elapsed_time": 28.310240568593144 - }, - "speedup": 1.8303357184393354, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 484864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 821248, - "linear_dense_total": 4718592, - "linear_nnz": 1306112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 604160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1247488, - "linear_dense_total": 4718592, - "linear_nnz": 1851648, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 343296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 200704, - "linear_dense_total": 4718592, - "linear_nnz": 544000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 215296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 216320, - "linear_dense_total": 4718592, - "linear_nnz": 431616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 813312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1803264, - "linear_dense_total": 4718592, - "linear_nnz": 2616576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1050880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1707520, - "linear_dense_total": 4718592, - "linear_nnz": 2758400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1007104, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1634816, - "linear_dense_total": 4718592, - "linear_nnz": 2641920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 769792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1574400, - "linear_dense_total": 4718592, - "linear_nnz": 2344192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 749056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1194496, - "linear_dense_total": 4718592, - "linear_nnz": 1943552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 765440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 861440, - "linear_dense_total": 4718592, - "linear_nnz": 1626880, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 645888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 319488, - "linear_dense_total": 4718592, - "linear_nnz": 965376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 368128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 145920, - "linear_dense_total": 4718592, - "linear_nnz": 514048, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 19544320, - "linear_sparsity": 76.98899257330247, - "linear_total": 84934656, - "nnz": 43464610, - "total": 108893186, - "total_sparsity": 60.08509660099393 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l10-dl1--2021-01-24--15-47-42/checkpoint-110660": { - "eval_metrics": { - "exact_match": 80.9271523178808, - "f1": 88.21768668110452 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 33.44704815673828, - "eval_elapsed_time": 40.718972705770284 - }, - "speedup": 1.1538953400165994, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 528912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2409360, - "linear_dense_total": 4718592, - "linear_nnz": 2938272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 618448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2534112, - "linear_dense_total": 4718592, - "linear_nnz": 3152560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 357616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 309216, - "linear_dense_total": 4718592, - "linear_nnz": 666832, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 219536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 276672, - "linear_dense_total": 4718592, - "linear_nnz": 496208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 835904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2670704, - "linear_dense_total": 4718592, - "linear_nnz": 3506608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 958400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2670800, - "linear_dense_total": 4718592, - "linear_nnz": 3629200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1091248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2620432, - "linear_dense_total": 4718592, - "linear_nnz": 3711680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1029984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2535968, - "linear_dense_total": 4718592, - "linear_nnz": 3565952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 964544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2286960, - "linear_dense_total": 4718592, - "linear_nnz": 3251504, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 813552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1727488, - "linear_dense_total": 4718592, - "linear_nnz": 2541040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 744336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1096768, - "linear_dense_total": 4718592, - "linear_nnz": 1841104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 473664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 386800, - "linear_dense_total": 4718592, - "linear_nnz": 860464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30161424, - "linear_sparsity": 64.48867232711225, - "linear_total": 84934656, - "nnz": 54106194, - "total": 108893186, - "total_sparsity": 50.31259899035372 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l20-dl1--2021-01-24--15-48-09/checkpoint-110000": { - "eval_metrics": { - "exact_match": 79.72563859981078, - "f1": 87.37325813950282 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.62903995513916, - "eval_elapsed_time": 37.18844554480165 - }, - "speedup": 1.302586687378539, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 369200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1492736, - "linear_dense_total": 4718592, - "linear_nnz": 1861936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 467520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1640864, - "linear_dense_total": 4718592, - "linear_nnz": 2108384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 242752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 173136, - "linear_dense_total": 4718592, - "linear_nnz": 415888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 157440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 168992, - "linear_dense_total": 4718592, - "linear_nnz": 326432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 642896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1775952, - "linear_dense_total": 4718592, - "linear_nnz": 2418848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 744752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1758400, - "linear_dense_total": 4718592, - "linear_nnz": 2503152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 774128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1673184, - "linear_dense_total": 4718592, - "linear_nnz": 2447312, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 636736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1581904, - "linear_dense_total": 4718592, - "linear_nnz": 2218640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 605744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1321456, - "linear_dense_total": 4718592, - "linear_nnz": 1927200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 548160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 906608, - "linear_dense_total": 4718592, - "linear_nnz": 1454768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 486720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 524512, - "linear_dense_total": 4718592, - "linear_nnz": 1011232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 307184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 180400, - "linear_dense_total": 4718592, - "linear_nnz": 487584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 19181376, - "linear_sparsity": 77.41631401909721, - "linear_total": 84934656, - "nnz": 43119238, - "total": 108893186, - "total_sparsity": 60.40226245194075 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l20-dl1--2021-01-24--15-48-09/checkpoint-110660": { - "eval_metrics": { - "exact_match": 79.77294228949859, - "f1": 87.35885990249378 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.662232711791994, - "eval_elapsed_time": 37.211166836321354 - }, - "speedup": 1.3011290613342195, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 369024, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1492400, - "linear_dense_total": 4718592, - "linear_nnz": 1861424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 467072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1640528, - "linear_dense_total": 4718592, - "linear_nnz": 2107600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 242352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 173264, - "linear_dense_total": 4718592, - "linear_nnz": 415616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 157280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 168800, - "linear_dense_total": 4718592, - "linear_nnz": 326080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 643248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1776032, - "linear_dense_total": 4718592, - "linear_nnz": 2419280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 744560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1758000, - "linear_dense_total": 4718592, - "linear_nnz": 2502560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 773760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1672784, - "linear_dense_total": 4718592, - "linear_nnz": 2446544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 636208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1581568, - "linear_dense_total": 4718592, - "linear_nnz": 2217776, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 605664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1321040, - "linear_dense_total": 4718592, - "linear_nnz": 1926704, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 548160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 906384, - "linear_dense_total": 4718592, - "linear_nnz": 1454544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 486464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 524352, - "linear_dense_total": 4718592, - "linear_nnz": 1010816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 306864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 180544, - "linear_dense_total": 4718592, - "linear_nnz": 487408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 19176352, - "linear_sparsity": 77.42222915461035, - "linear_total": 84934656, - "nnz": 43114218, - "total": 108893186, - "total_sparsity": 60.40687247409585 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l40-dl1--2021-01-24--15-48-35/checkpoint-105000": { - "eval_metrics": { - "exact_match": 77.84295175023652, - "f1": 85.93146728512978 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 24.637864067077636, - "eval_elapsed_time": 32.05906807305291 - }, - "speedup": 1.5664666750452154, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 246400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 777312, - "linear_dense_total": 4718592, - "linear_nnz": 1023712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 381872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 904544, - "linear_dense_total": 4718592, - "linear_nnz": 1286416, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 169216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 118720, - "linear_dense_total": 4718592, - "linear_nnz": 287936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 113984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 110384, - "linear_dense_total": 4718592, - "linear_nnz": 224368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 460688, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1024768, - "linear_dense_total": 4718592, - "linear_nnz": 1485456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 556080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1006160, - "linear_dense_total": 4718592, - "linear_nnz": 1562240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 487760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 920208, - "linear_dense_total": 4718592, - "linear_nnz": 1407968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 403424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 859936, - "linear_dense_total": 4718592, - "linear_nnz": 1263360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 380560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 679056, - "linear_dense_total": 4718592, - "linear_nnz": 1059616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 400704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 463040, - "linear_dense_total": 4718592, - "linear_nnz": 863744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 266832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 283984, - "linear_dense_total": 4718592, - "linear_nnz": 550816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 225120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 102560, - "linear_dense_total": 4718592, - "linear_nnz": 327680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 11343312, - "linear_sparsity": 86.64466010199654, - "linear_total": 84934656, - "nnz": 35270510, - "total": 108893186, - "total_sparsity": 67.60999352154138 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l40-dl1--2021-01-24--15-48-35/checkpoint-110000": { - "eval_metrics": { - "exact_match": 77.47398297067171, - "f1": 85.88482767255138 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 24.631753623962403, - "eval_elapsed_time": 32.0392144843936 - }, - "speedup": 1.5668552712310941, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 244016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 761056, - "linear_dense_total": 4718592, - "linear_nnz": 1005072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 377744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 887392, - "linear_dense_total": 4718592, - "linear_nnz": 1265136, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 166848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 118128, - "linear_dense_total": 4718592, - "linear_nnz": 284976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 113088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 110272, - "linear_dense_total": 4718592, - "linear_nnz": 223360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 453520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1009968, - "linear_dense_total": 4718592, - "linear_nnz": 1463488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 549264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 989184, - "linear_dense_total": 4718592, - "linear_nnz": 1538448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 480528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 907024, - "linear_dense_total": 4718592, - "linear_nnz": 1387552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 397568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 846224, - "linear_dense_total": 4718592, - "linear_nnz": 1243792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 373968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 670144, - "linear_dense_total": 4718592, - "linear_nnz": 1044112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 394160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 457328, - "linear_dense_total": 4718592, - "linear_nnz": 851488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 262368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 280864, - "linear_dense_total": 4718592, - "linear_nnz": 543232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 222176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 101472, - "linear_dense_total": 4718592, - "linear_nnz": 323648, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 11174304, - "linear_sparsity": 86.84364601417825, - "linear_total": 84934656, - "nnz": 35101310, - "total": 108893186, - "total_sparsity": 67.7653751447772 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l40-dl1--2021-01-24--15-48-35/checkpoint-110660": { - "eval_metrics": { - "exact_match": 77.4077578051088, - "f1": 85.78500582028688 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 24.588402084350587, - "eval_elapsed_time": 32.04897632403299 - }, - "speedup": 1.5696177764204813, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 244080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 760240, - "linear_dense_total": 4718592, - "linear_nnz": 1004320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 377328, - "linear_attention_total": 2359296, - "linear_dense_nnz": 887488, - "linear_dense_total": 4718592, - "linear_nnz": 1264816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 166640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 117888, - "linear_dense_total": 4718592, - "linear_nnz": 284528, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 113056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 110240, - "linear_dense_total": 4718592, - "linear_nnz": 223296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 453680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1009680, - "linear_dense_total": 4718592, - "linear_nnz": 1463360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 549056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 988176, - "linear_dense_total": 4718592, - "linear_nnz": 1537232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 480112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 906608, - "linear_dense_total": 4718592, - "linear_nnz": 1386720, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 397488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 846544, - "linear_dense_total": 4718592, - "linear_nnz": 1244032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 373632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 669920, - "linear_dense_total": 4718592, - "linear_nnz": 1043552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 393728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 457008, - "linear_dense_total": 4718592, - "linear_nnz": 850736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 262272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 280816, - "linear_dense_total": 4718592, - "linear_nnz": 543088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 221824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 101360, - "linear_dense_total": 4718592, - "linear_nnz": 323184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 11168864, - "linear_sparsity": 86.85005093798226, - "linear_total": 84934656, - "nnz": 35095854, - "total": 108893186, - "total_sparsity": 67.77038555929478 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a8-l10-dl1--2021-01-24--15-46-20/checkpoint-110000": { - "eval_metrics": { - "exact_match": 80.6244087038789, - "f1": 88.02730364897265 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.553753234863283, - "eval_elapsed_time": 36.97127141384408 - }, - "speedup": 1.3059049623464731, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 634048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2103872, - "linear_dense_total": 4718592, - "linear_nnz": 2737920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 662208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2320064, - "linear_dense_total": 4718592, - "linear_nnz": 2982272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 398848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 299520, - "linear_dense_total": 4718592, - "linear_nnz": 698368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 262976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 298560, - "linear_dense_total": 4718592, - "linear_nnz": 561536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 975744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2637888, - "linear_dense_total": 4718592, - "linear_nnz": 3613632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1107392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2681408, - "linear_dense_total": 4718592, - "linear_nnz": 3788800, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1248448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2625472, - "linear_dense_total": 4718592, - "linear_nnz": 3873920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1182592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2558784, - "linear_dense_total": 4718592, - "linear_nnz": 3741376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1016896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2130624, - "linear_dense_total": 4718592, - "linear_nnz": 3147520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 915648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1523904, - "linear_dense_total": 4718592, - "linear_nnz": 2439552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 820288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 827456, - "linear_dense_total": 4718592, - "linear_nnz": 1647744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 514176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 305408, - "linear_dense_total": 4718592, - "linear_nnz": 819584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30052224, - "linear_sparsity": 64.61724175347221, - "linear_total": 84934656, - "nnz": 53991210, - "total": 108893186, - "total_sparsity": 50.418192374314394 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a8-l10-dl1--2021-01-24--15-46-20/checkpoint-110660": { - "eval_metrics": { - "exact_match": 80.54872280037843, - "f1": 87.861684752796 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.255816642761232, - "eval_elapsed_time": 36.84984774328768 - }, - "speedup": 1.319204091160467, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 633664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2102592, - "linear_dense_total": 4718592, - "linear_nnz": 2736256, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 662336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2319616, - "linear_dense_total": 4718592, - "linear_nnz": 2981952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 396032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 297856, - "linear_dense_total": 4718592, - "linear_nnz": 693888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 262208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 297792, - "linear_dense_total": 4718592, - "linear_nnz": 560000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 975296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2636544, - "linear_dense_total": 4718592, - "linear_nnz": 3611840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1107968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2680128, - "linear_dense_total": 4718592, - "linear_nnz": 3788096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1247936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2623936, - "linear_dense_total": 4718592, - "linear_nnz": 3871872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1181888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2558208, - "linear_dense_total": 4718592, - "linear_nnz": 3740096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1015040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2132480, - "linear_dense_total": 4718592, - "linear_nnz": 3147520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 913792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1523328, - "linear_dense_total": 4718592, - "linear_nnz": 2437120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 818752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 827264, - "linear_dense_total": 4718592, - "linear_nnz": 1646016, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 514368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 304640, - "linear_dense_total": 4718592, - "linear_nnz": 819008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30033664, - "linear_sparsity": 64.6390938464506, - "linear_total": 84934656, - "nnz": 53972650, - "total": 108893186, - "total_sparsity": 50.4352365996528 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a8-l20-dl1--2021-01-24--15-46-47/checkpoint-110660": { - "eval_metrics": { - "exact_match": 79.05392620624409, - "f1": 86.84949475139184 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 24.667898628234862, - "eval_elapsed_time": 32.10200677579269 - }, - "speedup": 1.5645594133095706, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 407936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1088064, - "linear_dense_total": 4718592, - "linear_nnz": 1496000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 569088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1378944, - "linear_dense_total": 4718592, - "linear_nnz": 1948032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 298112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 181568, - "linear_dense_total": 4718592, - "linear_nnz": 479680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 185728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 199488, - "linear_dense_total": 4718592, - "linear_nnz": 385216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 770560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1695552, - "linear_dense_total": 4718592, - "linear_nnz": 2466112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 902848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1680512, - "linear_dense_total": 4718592, - "linear_nnz": 2583360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 913216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1624640, - "linear_dense_total": 4718592, - "linear_nnz": 2537856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 749440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1534912, - "linear_dense_total": 4718592, - "linear_nnz": 2284352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 684480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1190976, - "linear_dense_total": 4718592, - "linear_nnz": 1875456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 672320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 815872, - "linear_dense_total": 4718592, - "linear_nnz": 1488192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 570176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 399104, - "linear_dense_total": 4718592, - "linear_nnz": 969280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 345664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 167744, - "linear_dense_total": 4718592, - "linear_nnz": 513408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 19026944, - "linear_sparsity": 77.59813850308642, - "linear_total": 84934656, - "nnz": 42955274, - "total": 108893186, - "total_sparsity": 60.55283569350244 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a8-l40-dl1--2021-01-24--15-47-15/checkpoint-110660": { - "eval_metrics": { - "exact_match": 76.87795648060549, - "f1": 85.16652519097626 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 19.238733966827393, - "eval_elapsed_time": 26.43846725206822 - }, - "speedup": 2.0060775865978457, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 330432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 520000, - "linear_dense_total": 4718592, - "linear_nnz": 850432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 468224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 724864, - "linear_dense_total": 4718592, - "linear_nnz": 1193088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 206912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 137088, - "linear_dense_total": 4718592, - "linear_nnz": 344000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 127744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 128064, - "linear_dense_total": 4718592, - "linear_nnz": 255808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 511104, - "linear_attention_total": 2359296, - "linear_dense_nnz": 975680, - "linear_dense_total": 4718592, - "linear_nnz": 1486784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 688192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 908032, - "linear_dense_total": 4718592, - "linear_nnz": 1596224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 551360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 863296, - "linear_dense_total": 4718592, - "linear_nnz": 1414656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 466304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 787328, - "linear_dense_total": 4718592, - "linear_nnz": 1253632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 451840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 695488, - "linear_dense_total": 4718592, - "linear_nnz": 1147328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 497920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 475840, - "linear_dense_total": 4718592, - "linear_nnz": 973760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 302528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 217600, - "linear_dense_total": 4718592, - "linear_nnz": 520128, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 255168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 91264, - "linear_dense_total": 4718592, - "linear_nnz": 346432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 11382272, - "linear_sparsity": 86.59878954475309, - "linear_total": 84934656, - "nnz": 35298682, - "total": 108893186, - "total_sparsity": 67.5841222976064 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110000": { - "eval_metrics": { - "exact_match": 80.93661305581836, - "f1": 88.35425478567389 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 23.427229469299316, - "eval_elapsed_time": 30.796412555966526 - }, - "speedup": 1.6474160145973682, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 880896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1947648, - "linear_dense_total": 4718592, - "linear_nnz": 2828544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 849152, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1970688, - "linear_dense_total": 4718592, - "linear_nnz": 2819840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 583168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 228864, - "linear_dense_total": 4718592, - "linear_nnz": 812032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 397312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 824832, - "linear_dense_total": 4718592, - "linear_nnz": 1222144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1169408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2198016, - "linear_dense_total": 4718592, - "linear_nnz": 3367424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1352448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2168832, - "linear_dense_total": 4718592, - "linear_nnz": 3521280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1524992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2168832, - "linear_dense_total": 4718592, - "linear_nnz": 3693824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1511680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2042880, - "linear_dense_total": 4718592, - "linear_nnz": 3554560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1336320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1598976, - "linear_dense_total": 4718592, - "linear_nnz": 2935296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1178112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1274880, - "linear_dense_total": 4718592, - "linear_nnz": 2452992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1134080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 701952, - "linear_dense_total": 4718592, - "linear_nnz": 1836032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 702976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 321024, - "linear_dense_total": 4718592, - "linear_nnz": 1024000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30067968, - "linear_sparsity": 64.59870515046296, - "linear_total": 84934656, - "nnz": 53990689, - "total": 108893186, - "total_sparsity": 50.41867082482094 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110660": { - "eval_metrics": { - "exact_match": 81.11636707663197, - "f1": 88.26635621180897 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 23.067204750061034, - "eval_elapsed_time": 30.552880198229104 - }, - "speedup": 1.6731282972319816, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 878336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1947648, - "linear_dense_total": 4718592, - "linear_nnz": 2825984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 852736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1970688, - "linear_dense_total": 4718592, - "linear_nnz": 2823424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 583168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 228864, - "linear_dense_total": 4718592, - "linear_nnz": 812032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 385792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 824832, - "linear_dense_total": 4718592, - "linear_nnz": 1210624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1168384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2198016, - "linear_dense_total": 4718592, - "linear_nnz": 3366400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1360384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2168832, - "linear_dense_total": 4718592, - "linear_nnz": 3529216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1525248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2168832, - "linear_dense_total": 4718592, - "linear_nnz": 3694080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1519360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2042880, - "linear_dense_total": 4718592, - "linear_nnz": 3562240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1345792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1598976, - "linear_dense_total": 4718592, - "linear_nnz": 2944768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1175296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1274880, - "linear_dense_total": 4718592, - "linear_nnz": 2450176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1126912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 701952, - "linear_dense_total": 4718592, - "linear_nnz": 1828864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 702464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 321024, - "linear_dense_total": 4718592, - "linear_nnz": 1023488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30071296, - "linear_sparsity": 64.5947868441358, - "linear_total": 84934656, - "nnz": 53994017, - "total": 108893186, - "total_sparsity": 50.41561461889819 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110000": { - "eval_metrics": { - "exact_match": 80.93661305581836, - "f1": 88.29241912882233 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.73566310119629, - "eval_elapsed_time": 37.101448519621044 - }, - "speedup": 1.2979160032189903, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 466432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2896491, - "linear_dense_total": 4718592, - "linear_nnz": 3362923, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 578560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2933262, - "linear_dense_total": 4718592, - "linear_nnz": 3511822, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 353792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1168742, - "linear_dense_total": 4718592, - "linear_nnz": 1522534, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 204032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 632549, - "linear_dense_total": 4718592, - "linear_nnz": 836581, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 636672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3005770, - "linear_dense_total": 4718592, - "linear_nnz": 3642442, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 857344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2985991, - "linear_dense_total": 4718592, - "linear_nnz": 3843335, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 829184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2939127, - "linear_dense_total": 4718592, - "linear_nnz": 3768311, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 754432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2915969, - "linear_dense_total": 4718592, - "linear_nnz": 3670401, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 767488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2787598, - "linear_dense_total": 4718592, - "linear_nnz": 3555086, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 752640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2497594, - "linear_dense_total": 4718592, - "linear_nnz": 3250234, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 553472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2115777, - "linear_dense_total": 4718592, - "linear_nnz": 2669249, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 413440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1490216, - "linear_dense_total": 4718592, - "linear_nnz": 1903656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 35536574, - "linear_sparsity": 58.160101337197375, - "linear_total": 84934656, - "nnz": 59478503, - "total": 108893186, - "total_sparsity": 45.379040521415185 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110660": { - "eval_metrics": { - "exact_match": 80.77578051087986, - "f1": 88.22778160568927 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.759838722229006, - "eval_elapsed_time": 37.11843426898122 - }, - "speedup": 1.2968616317313288, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 469248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2896466, - "linear_dense_total": 4718592, - "linear_nnz": 3365714, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 574976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2933134, - "linear_dense_total": 4718592, - "linear_nnz": 3508110, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 355584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1168698, - "linear_dense_total": 4718592, - "linear_nnz": 1524282, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 201472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 632483, - "linear_dense_total": 4718592, - "linear_nnz": 833955, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 634624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3005666, - "linear_dense_total": 4718592, - "linear_nnz": 3640290, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 851456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2985914, - "linear_dense_total": 4718592, - "linear_nnz": 3837370, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 830720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2938982, - "linear_dense_total": 4718592, - "linear_nnz": 3769702, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 756480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2915873, - "linear_dense_total": 4718592, - "linear_nnz": 3672353, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 768256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2787463, - "linear_dense_total": 4718592, - "linear_nnz": 3555719, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 753408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2497485, - "linear_dense_total": 4718592, - "linear_nnz": 3250893, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 550912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2115685, - "linear_dense_total": 4718592, - "linear_nnz": 2666597, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 413184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1490132, - "linear_dense_total": 4718592, - "linear_nnz": 1903316, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 35528301, - "linear_sparsity": 58.16984176635742, - "linear_total": 84934656, - "nnz": 59470230, - "total": 108893186, - "total_sparsity": 45.38663787466004 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110000": { - "eval_metrics": { - "exact_match": 79.92431409649953, - "f1": 87.57193515884181 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 27.83310959625244, - "eval_elapsed_time": 35.16166925104335 - }, - "speedup": 1.3866360448121684, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 341248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2332385, - "linear_dense_total": 4718592, - "linear_nnz": 2673633, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 462592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2387588, - "linear_dense_total": 4718592, - "linear_nnz": 2850180, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 227328, - "linear_attention_total": 2359296, - "linear_dense_nnz": 646176, - "linear_dense_total": 4718592, - "linear_nnz": 873504, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 128000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 326046, - "linear_dense_total": 4718592, - "linear_nnz": 454046, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 412672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2458384, - "linear_dense_total": 4718592, - "linear_nnz": 2871056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 692736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2421331, - "linear_dense_total": 4718592, - "linear_nnz": 3114067, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 505088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2348506, - "linear_dense_total": 4718592, - "linear_nnz": 2853594, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 548864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2322654, - "linear_dense_total": 4718592, - "linear_nnz": 2871518, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 469504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2138640, - "linear_dense_total": 4718592, - "linear_nnz": 2608144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 552448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1830183, - "linear_dense_total": 4718592, - "linear_nnz": 2382631, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 316672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1440503, - "linear_dense_total": 4718592, - "linear_nnz": 1757175, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 292096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 859209, - "linear_dense_total": 4718592, - "linear_nnz": 1151305, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 26460853, - "linear_sparsity": 68.84563469592435, - "linear_total": 84934656, - "nnz": 50398933, - "total": 108893186, - "total_sparsity": 53.71709208691902 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110660": { - "eval_metrics": { - "exact_match": 80.02838221381268, - "f1": 87.5280353923367 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 27.96729845428467, - "eval_elapsed_time": 35.3477450478822 - }, - "speedup": 1.3799828778048573, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 335872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2332233, - "linear_dense_total": 4718592, - "linear_nnz": 2668105, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 451584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2387496, - "linear_dense_total": 4718592, - "linear_nnz": 2839080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 224768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 646159, - "linear_dense_total": 4718592, - "linear_nnz": 870927, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 124672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 325999, - "linear_dense_total": 4718592, - "linear_nnz": 450671, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 408576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2458332, - "linear_dense_total": 4718592, - "linear_nnz": 2866908, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 682496, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2421186, - "linear_dense_total": 4718592, - "linear_nnz": 3103682, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 504832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2348406, - "linear_dense_total": 4718592, - "linear_nnz": 2853238, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 558336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2322448, - "linear_dense_total": 4718592, - "linear_nnz": 2880784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 475904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2138474, - "linear_dense_total": 4718592, - "linear_nnz": 2614378, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 542720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1830088, - "linear_dense_total": 4718592, - "linear_nnz": 2372808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 312576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1440402, - "linear_dense_total": 4718592, - "linear_nnz": 1752978, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 288000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 859129, - "linear_dense_total": 4718592, - "linear_nnz": 1147129, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 26420688, - "linear_sparsity": 68.89292399088542, - "linear_total": 84934656, - "nnz": 50358753, - "total": 108893186, - "total_sparsity": 53.75399063078199 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l40--2021-01-19--16-58-18/checkpoint-110660": { - "eval_metrics": { - "exact_match": 78.86471144749291, - "f1": 86.87223379259328 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 26.916674156188964, - "eval_elapsed_time": 34.25446852017194 - }, - "speedup": 1.4338470191904102, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 211712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1718621, - "linear_dense_total": 4718592, - "linear_nnz": 1930333, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 345600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1771278, - "linear_dense_total": 4718592, - "linear_nnz": 2116878, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 157696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 325955, - "linear_dense_total": 4718592, - "linear_nnz": 483651, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 90368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 164774, - "linear_dense_total": 4718592, - "linear_nnz": 255142, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 278016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1816807, - "linear_dense_total": 4718592, - "linear_nnz": 2094823, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 493312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1772769, - "linear_dense_total": 4718592, - "linear_nnz": 2266081, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 304128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1682765, - "linear_dense_total": 4718592, - "linear_nnz": 1986893, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 357376, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1635131, - "linear_dense_total": 4718592, - "linear_nnz": 1992507, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 278528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1457711, - "linear_dense_total": 4718592, - "linear_nnz": 1736239, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 355072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1174807, - "linear_dense_total": 4718592, - "linear_nnz": 1529879, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 183552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 867865, - "linear_dense_total": 4718592, - "linear_nnz": 1051417, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 196864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 439457, - "linear_dense_total": 4718592, - "linear_nnz": 636321, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 18080164, - "linear_sparsity": 78.7128542676384, - "linear_total": 84934656, - "nnz": 42014844, - "total": 108893186, - "total_sparsity": 61.41646181607727 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l10--2021-01-19--17-00-07/checkpoint-110660": { - "eval_metrics": { - "exact_match": 81.3434247871334, - "f1": 88.502960365548 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 34.458772911071776, - "eval_elapsed_time": 41.833797600120306 - }, - "speedup": 1.120016464456589, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 356016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2711219, - "linear_dense_total": 4718592, - "linear_nnz": 3067235, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 506400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2753947, - "linear_dense_total": 4718592, - "linear_nnz": 3260347, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 305952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 956610, - "linear_dense_total": 4718592, - "linear_nnz": 1262562, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 172864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 532866, - "linear_dense_total": 4718592, - "linear_nnz": 705730, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 658880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2827796, - "linear_dense_total": 4718592, - "linear_nnz": 3486676, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 782176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2810214, - "linear_dense_total": 4718592, - "linear_nnz": 3592390, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 874272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2770460, - "linear_dense_total": 4718592, - "linear_nnz": 3644732, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 772928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2750302, - "linear_dense_total": 4718592, - "linear_nnz": 3523230, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 767984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2610331, - "linear_dense_total": 4718592, - "linear_nnz": 3378315, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 687968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2295378, - "linear_dense_total": 4718592, - "linear_nnz": 2983346, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 596368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1868727, - "linear_dense_total": 4718592, - "linear_nnz": 2465095, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 404448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1245775, - "linear_dense_total": 4718592, - "linear_nnz": 1650223, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 33019881, - "linear_sparsity": 61.12319451791268, - "linear_total": 84934656, - "nnz": 56967217, - "total": 108893186, - "total_sparsity": 47.6852325727709 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l20--2021-01-19--17-00-34/checkpoint-110660": { - "eval_metrics": { - "exact_match": 80.54872280037843, - "f1": 88.09731480353894 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 31.47156787109375, - "eval_elapsed_time": 38.88521202793345 - }, - "speedup": 1.2263257160702048, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 233808, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2108257, - "linear_dense_total": 4718592, - "linear_nnz": 2342065, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 370912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2165809, - "linear_dense_total": 4718592, - "linear_nnz": 2536721, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 189856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 496337, - "linear_dense_total": 4718592, - "linear_nnz": 686193, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 106192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 273404, - "linear_dense_total": 4718592, - "linear_nnz": 379596, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 368864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2238488, - "linear_dense_total": 4718592, - "linear_nnz": 2607352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 528528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2212294, - "linear_dense_total": 4718592, - "linear_nnz": 2740822, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 515168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2147598, - "linear_dense_total": 4718592, - "linear_nnz": 2662766, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 456576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2125672, - "linear_dense_total": 4718592, - "linear_nnz": 2582248, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 426512, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1957790, - "linear_dense_total": 4718592, - "linear_nnz": 2384302, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 424416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1621523, - "linear_dense_total": 4718592, - "linear_nnz": 2045939, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 311248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1220304, - "linear_dense_total": 4718592, - "linear_nnz": 1531552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 249120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 687520, - "linear_dense_total": 4718592, - "linear_nnz": 936640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 23436196, - "linear_sparsity": 72.40679234634212, - "linear_total": 84934656, - "nnz": 47377613, - "total": 108893186, - "total_sparsity": 56.49166422589565 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-105000": { - "eval_metrics": { - "exact_match": 79.39451277199622, - "f1": 87.22039562207584 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.016168815612794, - "eval_elapsed_time": 36.33264479693025 - }, - "speedup": 1.3300995472773969, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 145232, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1501972, - "linear_dense_total": 4718592, - "linear_nnz": 1647204, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 280192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1562394, - "linear_dense_total": 4718592, - "linear_nnz": 1842586, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 126288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 233713, - "linear_dense_total": 4718592, - "linear_nnz": 360001, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 73824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 141408, - "linear_dense_total": 4718592, - "linear_nnz": 215232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 234064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1607786, - "linear_dense_total": 4718592, - "linear_nnz": 1841850, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 386752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1573980, - "linear_dense_total": 4718592, - "linear_nnz": 1960732, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 281632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1495134, - "linear_dense_total": 4718592, - "linear_nnz": 1776766, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 288320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1455910, - "linear_dense_total": 4718592, - "linear_nnz": 1744230, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 240864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1278042, - "linear_dense_total": 4718592, - "linear_nnz": 1518906, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 275424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1000907, - "linear_dense_total": 4718592, - "linear_nnz": 1276331, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 170816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 708174, - "linear_dense_total": 4718592, - "linear_nnz": 878990, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 165920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 330996, - "linear_dense_total": 4718592, - "linear_nnz": 496916, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15559744, - "linear_sparsity": 81.68033552758487, - "linear_total": 84934656, - "nnz": 39496838, - "total": 108893186, - "total_sparsity": 63.728825052469304 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110000": { - "eval_metrics": { - "exact_match": 79.12961210974456, - "f1": 87.04337592394437 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.089330375671388, - "eval_elapsed_time": 36.40407280996442 - }, - "speedup": 1.3267542603060118, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 142224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1492013, - "linear_dense_total": 4718592, - "linear_nnz": 1634237, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 275696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1552665, - "linear_dense_total": 4718592, - "linear_nnz": 1828361, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 124096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 231707, - "linear_dense_total": 4718592, - "linear_nnz": 355803, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 72608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 140436, - "linear_dense_total": 4718592, - "linear_nnz": 213044, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 227984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1597983, - "linear_dense_total": 4718592, - "linear_nnz": 1825967, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 379616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1563950, - "linear_dense_total": 4718592, - "linear_nnz": 1943566, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 275824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1485631, - "linear_dense_total": 4718592, - "linear_nnz": 1761455, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 282736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1446554, - "linear_dense_total": 4718592, - "linear_nnz": 1729290, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 235856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1269099, - "linear_dense_total": 4718592, - "linear_nnz": 1504955, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 269520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 993705, - "linear_dense_total": 4718592, - "linear_nnz": 1263225, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 167616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 702880, - "linear_dense_total": 4718592, - "linear_nnz": 870496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 161552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 328143, - "linear_dense_total": 4718592, - "linear_nnz": 489695, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15420094, - "linear_sparsity": 81.84475604398752, - "linear_total": 84934656, - "nnz": 39357122, - "total": 108893186, - "total_sparsity": 63.85713060135829 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110660": { - "eval_metrics": { - "exact_match": 79.09176915799432, - "f1": 86.93076968810146 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.182387649536132, - "eval_elapsed_time": 36.50873678829521 - }, - "speedup": 1.3225234846739682, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 142224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1491817, - "linear_dense_total": 4718592, - "linear_nnz": 1634041, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 275888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1552458, - "linear_dense_total": 4718592, - "linear_nnz": 1828346, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 123920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 231690, - "linear_dense_total": 4718592, - "linear_nnz": 355610, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 72512, - "linear_attention_total": 2359296, - "linear_dense_nnz": 140404, - "linear_dense_total": 4718592, - "linear_nnz": 212916, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 227744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1597816, - "linear_dense_total": 4718592, - "linear_nnz": 1825560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 379008, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1563794, - "linear_dense_total": 4718592, - "linear_nnz": 1942802, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 276192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1485468, - "linear_dense_total": 4718592, - "linear_nnz": 1761660, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 282096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1446397, - "linear_dense_total": 4718592, - "linear_nnz": 1728493, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 235856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1268987, - "linear_dense_total": 4718592, - "linear_nnz": 1504843, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 269456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 993538, - "linear_dense_total": 4718592, - "linear_nnz": 1262994, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 167520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 702743, - "linear_dense_total": 4718592, - "linear_nnz": 870263, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 161424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 328079, - "linear_dense_total": 4718592, - "linear_nnz": 489503, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15417031, - "linear_sparsity": 81.84836234575437, - "linear_total": 84934656, - "nnz": 39354055, - "total": 108893186, - "total_sparsity": 63.859947122862216 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110000": { - "eval_metrics": { - "exact_match": 81.00283822138127, - "f1": 88.2671108560581 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 32.23066467285156, - "eval_elapsed_time": 39.6229472043924 - }, - "speedup": 1.1974432856757005, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 405824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2826999, - "linear_dense_total": 4718592, - "linear_nnz": 3232823, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 543872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2868775, - "linear_dense_total": 4718592, - "linear_nnz": 3412647, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 325760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1081633, - "linear_dense_total": 4718592, - "linear_nnz": 1407393, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 174016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 591690, - "linear_dense_total": 4718592, - "linear_nnz": 765706, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 613248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2943603, - "linear_dense_total": 4718592, - "linear_nnz": 3556851, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 791424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2916774, - "linear_dense_total": 4718592, - "linear_nnz": 3708198, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 819072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2876887, - "linear_dense_total": 4718592, - "linear_nnz": 3695959, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 788928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2855670, - "linear_dense_total": 4718592, - "linear_nnz": 3644598, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 761600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2724886, - "linear_dense_total": 4718592, - "linear_nnz": 3486486, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 686464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2427996, - "linear_dense_total": 4718592, - "linear_nnz": 3114460, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 602496, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2013542, - "linear_dense_total": 4718592, - "linear_nnz": 2616038, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 381632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1394109, - "linear_dense_total": 4718592, - "linear_nnz": 1775741, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 34416900, - "linear_sparsity": 59.47837829589844, - "linear_total": 84934656, - "nnz": 58360680, - "total": 108893186, - "total_sparsity": 46.405572153982156 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110660": { - "eval_metrics": { - "exact_match": 81.01229895931883, - "f1": 88.16022239737082 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 32.31462289428711, - "eval_elapsed_time": 39.686994375661016 - }, - "speedup": 1.1943321489972945, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 404736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2826896, - "linear_dense_total": 4718592, - "linear_nnz": 3231632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 543040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2868676, - "linear_dense_total": 4718592, - "linear_nnz": 3411716, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 322624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1081551, - "linear_dense_total": 4718592, - "linear_nnz": 1404175, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 172288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 591605, - "linear_dense_total": 4718592, - "linear_nnz": 763893, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 614464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2943501, - "linear_dense_total": 4718592, - "linear_nnz": 3557965, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 790144, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2916630, - "linear_dense_total": 4718592, - "linear_nnz": 3706774, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 816832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2876748, - "linear_dense_total": 4718592, - "linear_nnz": 3693580, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 785920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2855585, - "linear_dense_total": 4718592, - "linear_nnz": 3641505, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 759424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2724738, - "linear_dense_total": 4718592, - "linear_nnz": 3484162, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 687040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2427854, - "linear_dense_total": 4718592, - "linear_nnz": 3114894, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 603648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2013418, - "linear_dense_total": 4718592, - "linear_nnz": 2617066, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 379328, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1394031, - "linear_dense_total": 4718592, - "linear_nnz": 1773359, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 34400721, - "linear_sparsity": 59.49742705733687, - "linear_total": 84934656, - "nnz": 58344499, - "total": 108893186, - "total_sparsity": 46.42043166961797 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l20--2021-01-19--16-59-13/checkpoint-110660": { - "eval_metrics": { - "exact_match": 80.15137180700094, - "f1": 87.62280270760408 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 28.9650231628418, - "eval_elapsed_time": 36.364678455051035 - }, - "speedup": 1.3324482010041157, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 278464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2254373, - "linear_dense_total": 4718592, - "linear_nnz": 2532837, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 411200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2313203, - "linear_dense_total": 4718592, - "linear_nnz": 2724403, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 207872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 587562, - "linear_dense_total": 4718592, - "linear_nnz": 795434, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 115648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 304918, - "linear_dense_total": 4718592, - "linear_nnz": 420566, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 388544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2383637, - "linear_dense_total": 4718592, - "linear_nnz": 2772181, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 616064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2346825, - "linear_dense_total": 4718592, - "linear_nnz": 2962889, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 475392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2281407, - "linear_dense_total": 4718592, - "linear_nnz": 2756799, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 485760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2255524, - "linear_dense_total": 4718592, - "linear_nnz": 2741284, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 436416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2089830, - "linear_dense_total": 4718592, - "linear_nnz": 2526246, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 473664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1759353, - "linear_dense_total": 4718592, - "linear_nnz": 2233017, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 292096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1360596, - "linear_dense_total": 4718592, - "linear_nnz": 1652692, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 260864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 795671, - "linear_dense_total": 4718592, - "linear_nnz": 1056535, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 25174883, - "linear_sparsity": 70.35970452391072, - "linear_total": 84934656, - "nnz": 49113499, - "total": 108893186, - "total_sparsity": 54.89754611459343 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-105000": { - "eval_metrics": { - "exact_match": 78.59981078524125, - "f1": 86.70965342219107 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 27.427432876586913, - "eval_elapsed_time": 34.77788851317018 - }, - "speedup": 1.407145655192423, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 185152, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1640271, - "linear_dense_total": 4718592, - "linear_nnz": 1825423, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 309376, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1701856, - "linear_dense_total": 4718592, - "linear_nnz": 2011232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 140224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 283313, - "linear_dense_total": 4718592, - "linear_nnz": 423537, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 82304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 154892, - "linear_dense_total": 4718592, - "linear_nnz": 237196, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 266368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1747153, - "linear_dense_total": 4718592, - "linear_nnz": 2013521, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 452288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1699193, - "linear_dense_total": 4718592, - "linear_nnz": 2151481, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 315584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1622345, - "linear_dense_total": 4718592, - "linear_nnz": 1937929, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 324160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1582184, - "linear_dense_total": 4718592, - "linear_nnz": 1906344, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 264448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1396319, - "linear_dense_total": 4718592, - "linear_nnz": 1660767, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 312704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1106218, - "linear_dense_total": 4718592, - "linear_nnz": 1418922, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 176128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 797060, - "linear_dense_total": 4718592, - "linear_nnz": 973188, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 178368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 396240, - "linear_dense_total": 4718592, - "linear_nnz": 574608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 17134148, - "linear_sparsity": 79.82667051715615, - "linear_total": 84934656, - "nnz": 41069735, - "total": 108893186, - "total_sparsity": 62.28438480989986 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110000": { - "eval_metrics": { - "exact_match": 78.78902554399244, - "f1": 86.80367154149816 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 27.48367044067383, - "eval_elapsed_time": 34.82450146274641 - }, - "speedup": 1.404266329298368, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 181120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1630256, - "linear_dense_total": 4718592, - "linear_nnz": 1811376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 307392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1692246, - "linear_dense_total": 4718592, - "linear_nnz": 1999638, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 136448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 281138, - "linear_dense_total": 4718592, - "linear_nnz": 417586, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 82304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 153935, - "linear_dense_total": 4718592, - "linear_nnz": 236239, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 266880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1737446, - "linear_dense_total": 4718592, - "linear_nnz": 2004326, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 442304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1689801, - "linear_dense_total": 4718592, - "linear_nnz": 2132105, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 309632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1613097, - "linear_dense_total": 4718592, - "linear_nnz": 1922729, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 313664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1572856, - "linear_dense_total": 4718592, - "linear_nnz": 1886520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 259072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1387577, - "linear_dense_total": 4718592, - "linear_nnz": 1646649, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 306112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1098827, - "linear_dense_total": 4718592, - "linear_nnz": 1404939, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 173184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 791446, - "linear_dense_total": 4718592, - "linear_nnz": 964630, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 172928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 393190, - "linear_dense_total": 4718592, - "linear_nnz": 566118, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 16992855, - "linear_sparsity": 79.99302546183267, - "linear_total": 84934656, - "nnz": 40928357, - "total": 108893186, - "total_sparsity": 62.414216625088 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110660": { - "eval_metrics": { - "exact_match": 78.6092715231788, - "f1": 86.70267601348202 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 27.478721130371095, - "eval_elapsed_time": 34.80613293591887 - }, - "speedup": 1.4045192577290035, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 180736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1630123, - "linear_dense_total": 4718592, - "linear_nnz": 1810859, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 305920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1692103, - "linear_dense_total": 4718592, - "linear_nnz": 1998023, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 135616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 281100, - "linear_dense_total": 4718592, - "linear_nnz": 416716, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 81536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 153912, - "linear_dense_total": 4718592, - "linear_nnz": 235448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 263936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1737263, - "linear_dense_total": 4718592, - "linear_nnz": 2001199, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 442496, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1689622, - "linear_dense_total": 4718592, - "linear_nnz": 2132118, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 306304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1612927, - "linear_dense_total": 4718592, - "linear_nnz": 1919231, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 312128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1572769, - "linear_dense_total": 4718592, - "linear_nnz": 1884897, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 258304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1387454, - "linear_dense_total": 4718592, - "linear_nnz": 1645758, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 305856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1098709, - "linear_dense_total": 4718592, - "linear_nnz": 1404565, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 172480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 791310, - "linear_dense_total": 4718592, - "linear_nnz": 963790, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 170944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 393127, - "linear_dense_total": 4718592, - "linear_nnz": 564071, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 16976675, - "linear_sparsity": 80.01207540064682, - "linear_total": 84934656, - "nnz": 40912185, - "total": 108893186, - "total_sparsity": 62.42906787574385 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l10--2021-01-20--18-58-11/checkpoint-110660": { - "eval_metrics": { - "exact_match": 80.93661305581836, - "f1": 88.34112193061533 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 16, - "dense_block_rows": 16, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 30.13610975646973, - "eval_elapsed_time": 37.54532916797325 - }, - "speedup": 1.2806693802635063, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 517888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4068608, - "linear_dense_total": 4718592, - "linear_nnz": 4586496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 641536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4202752, - "linear_dense_total": 4718592, - "linear_nnz": 4844288, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 415488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1090304, - "linear_dense_total": 4718592, - "linear_nnz": 1505792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 254720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 947200, - "linear_dense_total": 4718592, - "linear_nnz": 1201920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 841472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4313856, - "linear_dense_total": 4718592, - "linear_nnz": 5155328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1072896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4336128, - "linear_dense_total": 4718592, - "linear_nnz": 5409024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1068800, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4317184, - "linear_dense_total": 4718592, - "linear_nnz": 5385984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 961792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4311040, - "linear_dense_total": 4718592, - "linear_nnz": 5272832, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 986880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4141568, - "linear_dense_total": 4718592, - "linear_nnz": 5128448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 905472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3820032, - "linear_dense_total": 4718592, - "linear_nnz": 4725504, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 756224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3085568, - "linear_dense_total": 4718592, - "linear_nnz": 3841792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 463360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1416448, - "linear_dense_total": 4718592, - "linear_nnz": 1879808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 48937216, - "linear_sparsity": 42.38251109182099, - "linear_total": 84934656, - "nnz": 72878482, - "total": 108893186, - "total_sparsity": 33.07342297799975 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-105000": { - "eval_metrics": { - "exact_match": 80.02838221381268, - "f1": 87.51569063636161 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 16, - "dense_block_rows": 16, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 26.3544778213501, - "eval_elapsed_time": 33.69302155217156 - }, - "speedup": 1.4644339860190774, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 418816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3448576, - "linear_dense_total": 4718592, - "linear_nnz": 3867392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 553728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3696384, - "linear_dense_total": 4718592, - "linear_nnz": 4250112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 291584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 654592, - "linear_dense_total": 4718592, - "linear_nnz": 946176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 168960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 615424, - "linear_dense_total": 4718592, - "linear_nnz": 784384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 562432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3956992, - "linear_dense_total": 4718592, - "linear_nnz": 4519424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 827392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3982336, - "linear_dense_total": 4718592, - "linear_nnz": 4809728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 790016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3912960, - "linear_dense_total": 4718592, - "linear_nnz": 4702976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 701696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3899648, - "linear_dense_total": 4718592, - "linear_nnz": 4601344, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 667392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3576064, - "linear_dense_total": 4718592, - "linear_nnz": 4243456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 700416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2968832, - "linear_dense_total": 4718592, - "linear_nnz": 3669248, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 437504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1966592, - "linear_dense_total": 4718592, - "linear_nnz": 2404096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 361472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 693504, - "linear_dense_total": 4718592, - "linear_nnz": 1054976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 39853312, - "linear_sparsity": 53.077678915895056, - "linear_total": 84934656, - "nnz": 63788226, - "total": 108893186, - "total_sparsity": 41.42128782970864 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-110000": { - "eval_metrics": { - "exact_match": 79.55534531693472, - "f1": 87.439750439335 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 16, - "dense_block_rows": 16, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 26.32847610473633, - "eval_elapsed_time": 33.60846929671243 - }, - "speedup": 1.4658802450943298, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 416256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3444992, - "linear_dense_total": 4718592, - "linear_nnz": 3861248, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 541952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3708416, - "linear_dense_total": 4718592, - "linear_nnz": 4250368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 285184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 647936, - "linear_dense_total": 4718592, - "linear_nnz": 933120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 174080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 611328, - "linear_dense_total": 4718592, - "linear_nnz": 785408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 555520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3977216, - "linear_dense_total": 4718592, - "linear_nnz": 4532736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 802816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4019968, - "linear_dense_total": 4718592, - "linear_nnz": 4822784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 774400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3939840, - "linear_dense_total": 4718592, - "linear_nnz": 4714240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 686592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3897600, - "linear_dense_total": 4718592, - "linear_nnz": 4584192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 656384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3568640, - "linear_dense_total": 4718592, - "linear_nnz": 4225024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 676864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2957312, - "linear_dense_total": 4718592, - "linear_nnz": 3634176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 432640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1931264, - "linear_dense_total": 4718592, - "linear_nnz": 2363904, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 345344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 685056, - "linear_dense_total": 4718592, - "linear_nnz": 1030400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 39737600, - "linear_sparsity": 53.213915412808646, - "linear_total": 84934656, - "nnz": 63672482, - "total": 108893186, - "total_sparsity": 41.52757914531035 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-110660": { - "eval_metrics": { - "exact_match": 79.91485335856197, - "f1": 87.42973403288855 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 16, - "dense_block_rows": 16, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 26.343981628417968, - "eval_elapsed_time": 33.70636031124741 - }, - "speedup": 1.4650174582470206, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 417024, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3447808, - "linear_dense_total": 4718592, - "linear_nnz": 3864832, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 542720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3703296, - "linear_dense_total": 4718592, - "linear_nnz": 4246016, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 273408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 647424, - "linear_dense_total": 4718592, - "linear_nnz": 920832, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 166400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 608512, - "linear_dense_total": 4718592, - "linear_nnz": 774912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 555776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3981824, - "linear_dense_total": 4718592, - "linear_nnz": 4537600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 810240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4014336, - "linear_dense_total": 4718592, - "linear_nnz": 4824576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 764160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3940608, - "linear_dense_total": 4718592, - "linear_nnz": 4704768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 685824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3904256, - "linear_dense_total": 4718592, - "linear_nnz": 4590080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 647680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3571456, - "linear_dense_total": 4718592, - "linear_nnz": 4219136, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 684288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2956288, - "linear_dense_total": 4718592, - "linear_nnz": 3640576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 427264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1932800, - "linear_dense_total": 4718592, - "linear_nnz": 2360064, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 350976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 682496, - "linear_dense_total": 4718592, - "linear_nnz": 1033472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 39716864, - "linear_sparsity": 53.238329475308646, - "linear_total": 84934656, - "nnz": 63651698, - "total": 108893186, - "total_sparsity": 41.546665739029805 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l40--2021-01-20--18-59-08/checkpoint-110660": { - "eval_metrics": { - "exact_match": 78.11731315042573, - "f1": 86.14927876930865 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 16, - "dense_block_rows": 16, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 23.35162329864502, - "eval_elapsed_time": 30.60480569722131 - }, - "speedup": 1.6527498971607057, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 331008, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2354688, - "linear_dense_total": 4718592, - "linear_nnz": 2685696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 432384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2826240, - "linear_dense_total": 4718592, - "linear_nnz": 3258624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 203008, - "linear_attention_total": 2359296, - "linear_dense_nnz": 415744, - "linear_dense_total": 4718592, - "linear_nnz": 618752, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 112128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 423168, - "linear_dense_total": 4718592, - "linear_nnz": 535296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 423936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3302144, - "linear_dense_total": 4718592, - "linear_nnz": 3726080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 669440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3248128, - "linear_dense_total": 4718592, - "linear_nnz": 3917568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 453632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3193600, - "linear_dense_total": 4718592, - "linear_nnz": 3647232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 473856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3119616, - "linear_dense_total": 4718592, - "linear_nnz": 3593472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 445952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2493696, - "linear_dense_total": 4718592, - "linear_nnz": 2939648, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 490752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1891072, - "linear_dense_total": 4718592, - "linear_nnz": 2381824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 275712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1108736, - "linear_dense_total": 4718592, - "linear_nnz": 1384448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 258304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 348928, - "linear_dense_total": 4718592, - "linear_nnz": 607232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 29295872, - "linear_sparsity": 65.5077522183642, - "linear_total": 84934656, - "nnz": 53223538, - "total": 108893186, - "total_sparsity": 51.12316945157615 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l10--2021-01-20--19-01-04/checkpoint-110660": { - "eval_metrics": { - "exact_match": 81.47587511825922, - "f1": 88.58172107792693 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 34.2993692779541, - "eval_elapsed_time": 41.87211530236527 - }, - "speedup": 1.1252216532791355, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 428592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3980096, - "linear_dense_total": 4718592, - "linear_nnz": 4408688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 545744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4015584, - "linear_dense_total": 4718592, - "linear_nnz": 4561328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 329968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2092032, - "linear_dense_total": 4718592, - "linear_nnz": 2422000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 190816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1335104, - "linear_dense_total": 4718592, - "linear_nnz": 1525920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 729664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4061440, - "linear_dense_total": 4718592, - "linear_nnz": 4791104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 851472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4062640, - "linear_dense_total": 4718592, - "linear_nnz": 4914112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 960992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4047744, - "linear_dense_total": 4718592, - "linear_nnz": 5008736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 902768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4006096, - "linear_dense_total": 4718592, - "linear_nnz": 4908864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 861120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3920672, - "linear_dense_total": 4718592, - "linear_nnz": 4781792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 759664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3732848, - "linear_dense_total": 4718592, - "linear_nnz": 4492512, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 670096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3391392, - "linear_dense_total": 4718592, - "linear_nnz": 4061488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 444064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2661776, - "linear_dense_total": 4718592, - "linear_nnz": 3105840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 48982384, - "linear_sparsity": 42.329331386236504, - "linear_total": 84934656, - "nnz": 72930262, - "total": 108893186, - "total_sparsity": 33.025871793300276 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l20--2021-01-20--19-01-34/checkpoint-105000": { - "eval_metrics": { - "exact_match": 80.52980132450331, - "f1": 88.02284574429551 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 32.3459995803833, - "eval_elapsed_time": 40.03914254019037 - }, - "speedup": 1.1931736074335828, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 261808, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3592944, - "linear_dense_total": 4718592, - "linear_nnz": 3854752, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 407856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3665376, - "linear_dense_total": 4718592, - "linear_nnz": 4073232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 212544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1095184, - "linear_dense_total": 4718592, - "linear_nnz": 1307728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 122704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 702496, - "linear_dense_total": 4718592, - "linear_nnz": 825200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 470352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3747664, - "linear_dense_total": 4718592, - "linear_nnz": 4218016, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 586320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3731872, - "linear_dense_total": 4718592, - "linear_nnz": 4318192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 598112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3696160, - "linear_dense_total": 4718592, - "linear_nnz": 4294272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 540976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3650592, - "linear_dense_total": 4718592, - "linear_nnz": 4191568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 518320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3500640, - "linear_dense_total": 4718592, - "linear_nnz": 4018960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 494608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3197872, - "linear_dense_total": 4718592, - "linear_nnz": 3692480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 381872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2682864, - "linear_dense_total": 4718592, - "linear_nnz": 3064736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 281888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1600800, - "linear_dense_total": 4718592, - "linear_nnz": 1882688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 39741824, - "linear_sparsity": 53.208942177854944, - "linear_total": 84934656, - "nnz": 63685078, - "total": 108893186, - "total_sparsity": 41.51601184669167 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l20--2021-01-20--19-01-34/checkpoint-110660": { - "eval_metrics": { - "exact_match": 80.09460737937559, - "f1": 87.80889686617203 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 32.19205239105224, - "eval_elapsed_time": 39.82947535999119 - }, - "speedup": 1.1988795413397866, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 258016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3584960, - "linear_dense_total": 4718592, - "linear_nnz": 3842976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 404784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3659360, - "linear_dense_total": 4718592, - "linear_nnz": 4064144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 209136, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1083920, - "linear_dense_total": 4718592, - "linear_nnz": 1293056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 120976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 697408, - "linear_dense_total": 4718592, - "linear_nnz": 818384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 460752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3741328, - "linear_dense_total": 4718592, - "linear_nnz": 4202080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 577184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3724032, - "linear_dense_total": 4718592, - "linear_nnz": 4301216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 587792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3689648, - "linear_dense_total": 4718592, - "linear_nnz": 4277440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 530480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3641984, - "linear_dense_total": 4718592, - "linear_nnz": 4172464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 508336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3491408, - "linear_dense_total": 4718592, - "linear_nnz": 3999744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 486304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3187056, - "linear_dense_total": 4718592, - "linear_nnz": 3673360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 374032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2669344, - "linear_dense_total": 4718592, - "linear_nnz": 3043376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 276992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1586976, - "linear_dense_total": 4718592, - "linear_nnz": 1863968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 39552208, - "linear_sparsity": 53.432191448447156, - "linear_total": 84934656, - "nnz": 63495382, - "total": 108893186, - "total_sparsity": 41.69021558428826 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l40--2021-01-20--19-02-03/checkpoint-105000": { - "eval_metrics": { - "exact_match": 79.64049195837275, - "f1": 87.31499809166372 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 28.556625274658202, - "eval_elapsed_time": 36.13367621740326 - }, - "speedup": 1.3515039902008532, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 172416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2978704, - "linear_dense_total": 4718592, - "linear_nnz": 3151120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 308192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3103168, - "linear_dense_total": 4718592, - "linear_nnz": 3411360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 140384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 526736, - "linear_dense_total": 4718592, - "linear_nnz": 667120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 84608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 377248, - "linear_dense_total": 4718592, - "linear_nnz": 461856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 285568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3205568, - "linear_dense_total": 4718592, - "linear_nnz": 3491136, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 437904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3165264, - "linear_dense_total": 4718592, - "linear_nnz": 3603168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 321040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3089840, - "linear_dense_total": 4718592, - "linear_nnz": 3410880, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 332784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3023632, - "linear_dense_total": 4718592, - "linear_nnz": 3356416, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 288464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2784432, - "linear_dense_total": 4718592, - "linear_nnz": 3072896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 328464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2354768, - "linear_dense_total": 4718592, - "linear_nnz": 2683232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 204832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1752368, - "linear_dense_total": 4718592, - "linear_nnz": 1957200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 189616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 715936, - "linear_dense_total": 4718592, - "linear_nnz": 905552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30171936, - "linear_sparsity": 64.47629575376158, - "linear_total": 84934656, - "nnz": 54109530, - "total": 108893186, - "total_sparsity": 50.30953543778212 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l40--2021-01-20--19-02-03/checkpoint-110660": { - "eval_metrics": { - "exact_match": 79.13907284768212, - "f1": 86.97173787941202 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 28.558930541992186, - "eval_elapsed_time": 36.152482252102345 - }, - "speedup": 1.3513948972501988, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 169136, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2961360, - "linear_dense_total": 4718592, - "linear_nnz": 3130496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 304464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3089024, - "linear_dense_total": 4718592, - "linear_nnz": 3393488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 137920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 522400, - "linear_dense_total": 4718592, - "linear_nnz": 660320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 82480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 374544, - "linear_dense_total": 4718592, - "linear_nnz": 457024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 279216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3191664, - "linear_dense_total": 4718592, - "linear_nnz": 3470880, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 429728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3150736, - "linear_dense_total": 4718592, - "linear_nnz": 3580464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 314688, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3076048, - "linear_dense_total": 4718592, - "linear_nnz": 3390736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 326416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3008016, - "linear_dense_total": 4718592, - "linear_nnz": 3334432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 281984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2766480, - "linear_dense_total": 4718592, - "linear_nnz": 3048464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 320352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2338640, - "linear_dense_total": 4718592, - "linear_nnz": 2658992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 200608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1736048, - "linear_dense_total": 4718592, - "linear_nnz": 1936656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 185008, - "linear_attention_total": 2359296, - "linear_dense_nnz": 707152, - "linear_dense_total": 4718592, - "linear_nnz": 892160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 29954112, - "linear_sparsity": 64.7327564380787, - "linear_total": 84934656, - "nnz": 53891686, - "total": 108893186, - "total_sparsity": 50.50958835936713 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l10--2021-01-20--18-59-37/checkpoint-110000": { - "eval_metrics": { - "exact_match": 81.10690633869442, - "f1": 88.3744311515211 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 32.22343955230713, - "eval_elapsed_time": 39.62965265568346 - }, - "speedup": 1.1977117757004876, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 446336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4004672, - "linear_dense_total": 4718592, - "linear_nnz": 4451008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 597248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4077632, - "linear_dense_total": 4718592, - "linear_nnz": 4674880, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 362688, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1517184, - "linear_dense_total": 4718592, - "linear_nnz": 1879872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 218432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1064384, - "linear_dense_total": 4718592, - "linear_nnz": 1282816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 799296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4155072, - "linear_dense_total": 4718592, - "linear_nnz": 4954368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 950208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4165440, - "linear_dense_total": 4718592, - "linear_nnz": 5115648, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1022400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4152448, - "linear_dense_total": 4718592, - "linear_nnz": 5174848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 914368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4106624, - "linear_dense_total": 4718592, - "linear_nnz": 5020992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 918208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3961088, - "linear_dense_total": 4718592, - "linear_nnz": 4879296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 832704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3684992, - "linear_dense_total": 4718592, - "linear_nnz": 4517696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 715648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3207040, - "linear_dense_total": 4718592, - "linear_nnz": 3922688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 467072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2117440, - "linear_dense_total": 4718592, - "linear_nnz": 2584512, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 48458624, - "linear_sparsity": 42.94599368248457, - "linear_total": 84934656, - "nnz": 72403618, - "total": 108893186, - "total_sparsity": 33.50950536060172 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l10--2021-01-20--18-59-37/checkpoint-110660": { - "eval_metrics": { - "exact_match": 80.82308420056765, - "f1": 88.21300800880684 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 32.25489320373535, - "eval_elapsed_time": 39.64649308426306 - }, - "speedup": 1.1965438162077555, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 446080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4004864, - "linear_dense_total": 4718592, - "linear_nnz": 4450944, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 597312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4076928, - "linear_dense_total": 4718592, - "linear_nnz": 4674240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 362048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1517376, - "linear_dense_total": 4718592, - "linear_nnz": 1879424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 217216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1063808, - "linear_dense_total": 4718592, - "linear_nnz": 1281024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 800192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4155456, - "linear_dense_total": 4718592, - "linear_nnz": 4955648, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 948864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4165760, - "linear_dense_total": 4718592, - "linear_nnz": 5114624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1019200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4152640, - "linear_dense_total": 4718592, - "linear_nnz": 5171840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 915392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4108416, - "linear_dense_total": 4718592, - "linear_nnz": 5023808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 916160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3960384, - "linear_dense_total": 4718592, - "linear_nnz": 4876544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 834176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3685056, - "linear_dense_total": 4718592, - "linear_nnz": 4519232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 713856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3207936, - "linear_dense_total": 4718592, - "linear_nnz": 3921792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 465600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2115456, - "linear_dense_total": 4718592, - "linear_nnz": 2581056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 48450176, - "linear_sparsity": 42.95594015239198, - "linear_total": 84934656, - "nnz": 72395170, - "total": 108893186, - "total_sparsity": 33.51726342179023 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l20--2021-01-20--19-00-06/checkpoint-110660": { - "eval_metrics": { - "exact_match": 80.05676442762535, - "f1": 87.66615713942541 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 28.86345721435547, - "eval_elapsed_time": 36.22357800696045 - }, - "speedup": 1.3371368758339826, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 326336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3501120, - "linear_dense_total": 4718592, - "linear_nnz": 3827456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 487552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3653568, - "linear_dense_total": 4718592, - "linear_nnz": 4141120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 238208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 756608, - "linear_dense_total": 4718592, - "linear_nnz": 994816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 141568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 622848, - "linear_dense_total": 4718592, - "linear_nnz": 764416, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 487616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3801472, - "linear_dense_total": 4718592, - "linear_nnz": 4289088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 712832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3800064, - "linear_dense_total": 4718592, - "linear_nnz": 4512896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 646272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3743872, - "linear_dense_total": 4718592, - "linear_nnz": 4390144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 625600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3691328, - "linear_dense_total": 4718592, - "linear_nnz": 4316928, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 575808, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3461056, - "linear_dense_total": 4718592, - "linear_nnz": 4036864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 579392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3012928, - "linear_dense_total": 4718592, - "linear_nnz": 3592320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 405632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2347776, - "linear_dense_total": 4718592, - "linear_nnz": 2753408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 317440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1001344, - "linear_dense_total": 4718592, - "linear_nnz": 1318784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 38938240, - "linear_sparsity": 54.1550624517747, - "linear_total": 84934656, - "nnz": 62877338, - "total": 108893186, - "total_sparsity": 42.257784614732465 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l40--2021-01-20--19-00-35/checkpoint-110660": { - "eval_metrics": { - "exact_match": 78.76064333017976, - "f1": 86.75922108224064 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 25.933858947753908, - "eval_elapsed_time": 33.4375456799753 - }, - "speedup": 1.4881855061802785, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 241280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2752704, - "linear_dense_total": 4718592, - "linear_nnz": 2993984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 379584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2951104, - "linear_dense_total": 4718592, - "linear_nnz": 3330688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 172352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 419008, - "linear_dense_total": 4718592, - "linear_nnz": 591360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 104768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 388288, - "linear_dense_total": 4718592, - "linear_nnz": 493056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 322880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3194240, - "linear_dense_total": 4718592, - "linear_nnz": 3517120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 565440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3155136, - "linear_dense_total": 4718592, - "linear_nnz": 3720576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 390400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3064768, - "linear_dense_total": 4718592, - "linear_nnz": 3455168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 406592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2993600, - "linear_dense_total": 4718592, - "linear_nnz": 3400192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 356480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2631680, - "linear_dense_total": 4718592, - "linear_nnz": 2988160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 409920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2067776, - "linear_dense_total": 4718592, - "linear_nnz": 2477696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 242048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1370368, - "linear_dense_total": 4718592, - "linear_nnz": 1612416, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 224896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 472768, - "linear_dense_total": 4718592, - "linear_nnz": 697664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 29278080, - "linear_sparsity": 65.52870008680556, - "linear_total": 84934656, - "nnz": 53211146, - "total": 108893186, - "total_sparsity": 51.13454941064908 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13/checkpoint-22132": { - "eval_metrics": { - "exact_match": 80.69063386944181, - "f1": 88.06386432532665 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 17.390718185424806, - "eval_elapsed_time": 24.534384376835078 - }, - "speedup": 2.2192523962418718, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 634368, - "linear_dense_total": 4718592, - "linear_nnz": 1420800, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 916992, - "linear_dense_total": 4718592, - "linear_nnz": 1703424, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 112128, - "linear_dense_total": 4718592, - "linear_nnz": 1291776, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 313344, - "linear_dense_total": 4718592, - "linear_nnz": 903168, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 1016832, - "linear_dense_total": 4718592, - "linear_nnz": 2786304, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 1076736, - "linear_dense_total": 4718592, - "linear_nnz": 2649600, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 1158144, - "linear_dense_total": 4718592, - "linear_nnz": 3124224, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1073664, - "linear_dense_total": 4718592, - "linear_nnz": 2449920, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 815616, - "linear_dense_total": 4718592, - "linear_nnz": 2388480, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 629760, - "linear_dense_total": 4718592, - "linear_nnz": 2006016, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 337920, - "linear_dense_total": 4718592, - "linear_nnz": 1910784, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 139776, - "linear_dense_total": 4718592, - "linear_nnz": 1122816, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 23757312, - "linear_sparsity": 67.07467643051771, - "linear_total": 72155136, - "nnz": 47671853, - "total": 96101186, - "total_sparsity": 50.394105437991165 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40/checkpoint-22132": { - "eval_metrics": { - "exact_match": 80.23651844843897, - "f1": 87.68464122182475 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 17.154361824035647, - "eval_elapsed_time": 24.304617804009467 - }, - "speedup": 2.249829716853412, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 297984, - "linear_dense_total": 4718592, - "linear_nnz": 1477632, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 483840, - "linear_dense_total": 4718592, - "linear_nnz": 1466880, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 73728, - "linear_dense_total": 4718592, - "linear_nnz": 1253376, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 159744, - "linear_dense_total": 4718592, - "linear_nnz": 749568, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 619008, - "linear_dense_total": 4718592, - "linear_nnz": 2388480, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 657408, - "linear_dense_total": 4718592, - "linear_nnz": 2230272, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 705024, - "linear_dense_total": 4718592, - "linear_nnz": 2671104, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 668160, - "linear_dense_total": 4718592, - "linear_nnz": 2241024, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 516096, - "linear_dense_total": 4718592, - "linear_nnz": 2088960, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 384000, - "linear_dense_total": 4718592, - "linear_nnz": 1760256, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 204288, - "linear_dense_total": 4718592, - "linear_nnz": 1973760, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 92160, - "linear_dense_total": 4718592, - "linear_nnz": 1271808, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 21573120, - "linear_sparsity": 70.58269101876675, - "linear_total": 73334784, - "nnz": 45486623, - "total": 97281986, - "total_sparsity": 53.24250164876363 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-22132": { - "eval_metrics": { - "exact_match": 79.4228949858089, - "f1": 87.22907143184382 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 14.848762104034424, - "eval_elapsed_time": 22.048566517885774 - }, - "speedup": 2.5991656903766382, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 411648, - "linear_dense_total": 4718592, - "linear_nnz": 1198080, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 592896, - "linear_dense_total": 4718592, - "linear_nnz": 1379328, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 87552, - "linear_dense_total": 4718592, - "linear_nnz": 1070592, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 199680, - "linear_dense_total": 4718592, - "linear_nnz": 789504, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 698880, - "linear_dense_total": 4718592, - "linear_nnz": 1878528, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 714240, - "linear_dense_total": 4718592, - "linear_nnz": 2090496, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 834048, - "linear_dense_total": 4718592, - "linear_nnz": 2210304, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 743424, - "linear_dense_total": 4718592, - "linear_nnz": 1726464, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 568320, - "linear_dense_total": 4718592, - "linear_nnz": 1747968, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 450048, - "linear_dense_total": 4718592, - "linear_nnz": 1826304, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 264192, - "linear_dense_total": 4718592, - "linear_nnz": 1443840, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 101376, - "linear_dense_total": 4718592, - "linear_nnz": 1084416, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 18445824, - "linear_sparsity": 73.42200779036827, - "linear_total": 69402624, - "nnz": 42356011, - "total": 93345986, - "total_sparsity": 54.62471091151151 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15/checkpoint-22132": { - "eval_metrics": { - "exact_match": 78.82686849574267, - "f1": 86.75497848244157 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 14.354346725463868, - "eval_elapsed_time": 21.489493974950165 - }, - "speedup": 2.68869031405704, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 181248, - "linear_dense_total": 4718592, - "linear_nnz": 967680, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 299520, - "linear_dense_total": 4718592, - "linear_nnz": 1085952, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 58368, - "linear_dense_total": 4718592, - "linear_nnz": 1041408, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 96768, - "linear_dense_total": 4718592, - "linear_nnz": 686592, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 407040, - "linear_dense_total": 4718592, - "linear_nnz": 1586688, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 440832, - "linear_dense_total": 4718592, - "linear_nnz": 2013696, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 496128, - "linear_dense_total": 4718592, - "linear_nnz": 1872384, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 433152, - "linear_dense_total": 4718592, - "linear_nnz": 1416192, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 337920, - "linear_dense_total": 4718592, - "linear_nnz": 1517568, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 268800, - "linear_dense_total": 4718592, - "linear_nnz": 1645056, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 158208, - "linear_dense_total": 4718592, - "linear_nnz": 1534464, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 73728, - "linear_dense_total": 4718592, - "linear_nnz": 1056768, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 16424448, - "linear_sparsity": 76.46786971830986, - "linear_total": 69795840, - "nnz": 40333447, - "total": 93739586, - "total_sparsity": 56.972876965767696 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-22132": { - "eval_metrics": { - "exact_match": 81.69347209082308, - "f1": 88.72194531479171 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 20.951393741607667, - "eval_elapsed_time": 28.213609586004168 - }, - "speedup": 1.8420919143305463, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1125888, - "linear_dense_total": 4718592, - "linear_nnz": 2502144, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 1285632, - "linear_dense_total": 4718592, - "linear_nnz": 2268672, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 168960, - "linear_dense_total": 4718592, - "linear_nnz": 1545216, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 485376, - "linear_dense_total": 4718592, - "linear_nnz": 1468416, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 1523712, - "linear_dense_total": 4718592, - "linear_nnz": 3293184, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 1555968, - "linear_dense_total": 4718592, - "linear_nnz": 3325440, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 2162688, - "linear_attention_total": 2162688, - "linear_dense_nnz": 1617408, - "linear_dense_total": 4718592, - "linear_nnz": 3780096, - "linear_total": 6881280, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 1514496, - "linear_dense_total": 4718592, - "linear_nnz": 3480576, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 1135104, - "linear_dense_total": 4718592, - "linear_nnz": 2904576, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 847872, - "linear_dense_total": 4718592, - "linear_nnz": 2420736, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 474624, - "linear_dense_total": 4718592, - "linear_nnz": 2440704, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 208896, - "linear_dense_total": 4718592, - "linear_nnz": 1388544, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30818304, - "linear_sparsity": 59.1796875, - "linear_total": 75497472, - "nnz": 54738530, - "total": 99446786, - "total_sparsity": 44.95696422004025 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-20000": { - "eval_metrics": { - "exact_match": 80.86092715231788, - "f1": 88.26868699204444 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 19.458871116638186, - "eval_elapsed_time": 26.62503844080493 - }, - "speedup": 1.98338294004996, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 526848, - "linear_dense_total": 4718592, - "linear_nnz": 1903104, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 752640, - "linear_dense_total": 4718592, - "linear_nnz": 1735680, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 98304, - "linear_dense_total": 4718592, - "linear_nnz": 1671168, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 262656, - "linear_dense_total": 4718592, - "linear_nnz": 1245696, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 873984, - "linear_dense_total": 4718592, - "linear_nnz": 2840064, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 952320, - "linear_dense_total": 4718592, - "linear_nnz": 2721792, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 2162688, - "linear_attention_total": 2162688, - "linear_dense_nnz": 1046016, - "linear_dense_total": 4718592, - "linear_nnz": 3208704, - "linear_total": 6881280, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 986112, - "linear_dense_total": 4718592, - "linear_nnz": 2952192, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 740352, - "linear_dense_total": 4718592, - "linear_nnz": 2509824, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 559104, - "linear_dense_total": 4718592, - "linear_nnz": 2131968, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 293376, - "linear_dense_total": 4718592, - "linear_nnz": 2259456, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 113664, - "linear_dense_total": 4718592, - "linear_nnz": 1293312, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 26472960, - "linear_sparsity": 65.11698510362694, - "linear_total": 75890688, - "nnz": 50390485, - "total": 99840386, - "total_sparsity": 49.52895614806617 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-22132": { - "eval_metrics": { - "exact_match": 80.87038789025544, - "f1": 88.24613086360249 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 19.453059474945068, - "eval_elapsed_time": 26.577815205790102 - }, - "speedup": 1.9839754797994356, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 526848, - "linear_dense_total": 4718592, - "linear_nnz": 1903104, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 752640, - "linear_dense_total": 4718592, - "linear_nnz": 1735680, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 98304, - "linear_dense_total": 4718592, - "linear_nnz": 1671168, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 262656, - "linear_dense_total": 4718592, - "linear_nnz": 1245696, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 873984, - "linear_dense_total": 4718592, - "linear_nnz": 2840064, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 952320, - "linear_dense_total": 4718592, - "linear_nnz": 2721792, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 2162688, - "linear_attention_total": 2162688, - "linear_dense_nnz": 1046016, - "linear_dense_total": 4718592, - "linear_nnz": 3208704, - "linear_total": 6881280, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 986112, - "linear_dense_total": 4718592, - "linear_nnz": 2952192, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 740352, - "linear_dense_total": 4718592, - "linear_nnz": 2509824, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 559104, - "linear_dense_total": 4718592, - "linear_nnz": 2131968, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 293376, - "linear_dense_total": 4718592, - "linear_nnz": 2259456, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 113664, - "linear_dense_total": 4718592, - "linear_nnz": 1293312, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 26472960, - "linear_sparsity": 65.11698510362694, - "linear_total": 75890688, - "nnz": 50390485, - "total": 99840386, - "total_sparsity": 49.52895614806617 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-15000": { - "eval_metrics": { - "exact_match": 80.80416272469253, - "f1": 88.20260662536118 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 18.439563426971436, - "eval_elapsed_time": 25.7331585730426 - }, - "speedup": 2.0930209740713988, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1339392, - "linear_dense_total": 4718592, - "linear_nnz": 2125824, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1571328, - "linear_dense_total": 4718592, - "linear_nnz": 2357760, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 187392, - "linear_dense_total": 4718592, - "linear_nnz": 1367040, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 574464, - "linear_dense_total": 4718592, - "linear_nnz": 1164288, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 1744896, - "linear_dense_total": 4718592, - "linear_nnz": 3317760, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 1761792, - "linear_dense_total": 4718592, - "linear_nnz": 3334656, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 1726464, - "linear_dense_total": 4718592, - "linear_nnz": 3495936, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 1629696, - "linear_dense_total": 4718592, - "linear_nnz": 2809344, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1270272, - "linear_dense_total": 4718592, - "linear_nnz": 2646528, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 987648, - "linear_dense_total": 4718592, - "linear_nnz": 2363904, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 546816, - "linear_dense_total": 4718592, - "linear_nnz": 2119680, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 248832, - "linear_dense_total": 4718592, - "linear_nnz": 1428480, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 28531200, - "linear_sparsity": 60.1326407967033, - "linear_total": 71565312, - "nnz": 52448657, - "total": 95510786, - "total_sparsity": 45.08614241746477 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-20000": { - "eval_metrics": { - "exact_match": 80.6717123935667, - "f1": 88.128983727943 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 18.875869693756105, - "eval_elapsed_time": 26.023085076361895 - }, - "speedup": 2.044641843344449, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1339392, - "linear_dense_total": 4718592, - "linear_nnz": 2125824, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1571328, - "linear_dense_total": 4718592, - "linear_nnz": 2357760, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 187392, - "linear_dense_total": 4718592, - "linear_nnz": 1367040, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 574464, - "linear_dense_total": 4718592, - "linear_nnz": 1164288, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 1744896, - "linear_dense_total": 4718592, - "linear_nnz": 3317760, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 1761792, - "linear_dense_total": 4718592, - "linear_nnz": 3334656, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 1726464, - "linear_dense_total": 4718592, - "linear_nnz": 3495936, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 1629696, - "linear_dense_total": 4718592, - "linear_nnz": 2809344, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1270272, - "linear_dense_total": 4718592, - "linear_nnz": 2646528, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 987648, - "linear_dense_total": 4718592, - "linear_nnz": 2363904, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 546816, - "linear_dense_total": 4718592, - "linear_nnz": 2119680, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 248832, - "linear_dense_total": 4718592, - "linear_nnz": 1428480, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 28531200, - "linear_sparsity": 60.1326407967033, - "linear_total": 71565312, - "nnz": 52448657, - "total": 95510786, - "total_sparsity": 45.08614241746477 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": { - "eval_metrics": { - "exact_match": 80.68117313150425, - "f1": 88.11014400914335 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 18.42703369522095, - "eval_elapsed_time": 25.61402732366696 - }, - "speedup": 2.094444154371984, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1339392, - "linear_dense_total": 4718592, - "linear_nnz": 2125824, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1571328, - "linear_dense_total": 4718592, - "linear_nnz": 2357760, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 187392, - "linear_dense_total": 4718592, - "linear_nnz": 1367040, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 574464, - "linear_dense_total": 4718592, - "linear_nnz": 1164288, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 1744896, - "linear_dense_total": 4718592, - "linear_nnz": 3317760, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 1761792, - "linear_dense_total": 4718592, - "linear_nnz": 3334656, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 1726464, - "linear_dense_total": 4718592, - "linear_nnz": 3495936, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 1629696, - "linear_dense_total": 4718592, - "linear_nnz": 2809344, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1270272, - "linear_dense_total": 4718592, - "linear_nnz": 2646528, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 987648, - "linear_dense_total": 4718592, - "linear_nnz": 2363904, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 546816, - "linear_dense_total": 4718592, - "linear_nnz": 2119680, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 248832, - "linear_dense_total": 4718592, - "linear_nnz": 1428480, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 28531200, - "linear_sparsity": 60.1326407967033, - "linear_total": 71565312, - "nnz": 52448657, - "total": 95510786, - "total_sparsity": 45.08614241746477 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-20000": { - "eval_metrics": { - "exact_match": 80.01892147587512, - "f1": 87.70568682399205 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 15.845825397491456, - "eval_elapsed_time": 23.001069764140993 - }, - "speedup": 2.4356189745395627, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 847872, - "linear_dense_total": 4718592, - "linear_nnz": 1634304, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1101312, - "linear_dense_total": 4718592, - "linear_nnz": 1887744, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 147456, - "linear_dense_total": 4718592, - "linear_nnz": 1130496, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 365568, - "linear_dense_total": 4718592, - "linear_nnz": 955392, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 1221120, - "linear_dense_total": 4718592, - "linear_nnz": 2400768, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1211904, - "linear_dense_total": 4718592, - "linear_nnz": 2588160, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1279488, - "linear_dense_total": 4718592, - "linear_nnz": 2655744, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 1216512, - "linear_dense_total": 4718592, - "linear_nnz": 2199552, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 952320, - "linear_dense_total": 4718592, - "linear_nnz": 2131968, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 715776, - "linear_dense_total": 4718592, - "linear_nnz": 2092032, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 434688, - "linear_dense_total": 4718592, - "linear_nnz": 1417728, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 172032, - "linear_dense_total": 4718592, - "linear_nnz": 1155072, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 22248960, - "linear_sparsity": 67.85111860795455, - "linear_total": 69206016, - "nnz": 46161559, - "total": 93149186, - "total_sparsity": 50.443411282198426 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": { - "eval_metrics": { - "exact_match": 80.02838221381268, - "f1": 87.70940223967354 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 15.838374267578125, - "eval_elapsed_time": 22.999519595876336 - }, - "speedup": 2.436764806371294, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 847872, - "linear_dense_total": 4718592, - "linear_nnz": 1634304, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1101312, - "linear_dense_total": 4718592, - "linear_nnz": 1887744, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 147456, - "linear_dense_total": 4718592, - "linear_nnz": 1130496, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 365568, - "linear_dense_total": 4718592, - "linear_nnz": 955392, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 1221120, - "linear_dense_total": 4718592, - "linear_nnz": 2400768, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1211904, - "linear_dense_total": 4718592, - "linear_nnz": 2588160, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1279488, - "linear_dense_total": 4718592, - "linear_nnz": 2655744, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 1216512, - "linear_dense_total": 4718592, - "linear_nnz": 2199552, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 952320, - "linear_dense_total": 4718592, - "linear_nnz": 2131968, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 715776, - "linear_dense_total": 4718592, - "linear_nnz": 2092032, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 434688, - "linear_dense_total": 4718592, - "linear_nnz": 1417728, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 172032, - "linear_dense_total": 4718592, - "linear_nnz": 1155072, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 22248960, - "linear_sparsity": 67.85111860795455, - "linear_total": 69206016, - "nnz": 46161559, - "total": 93149186, - "total_sparsity": 50.443411282198426 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": { - "eval_metrics": { - "exact_match": 78.63765373699148, - "f1": 86.69392512957342 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 13.783753513336181, - "eval_elapsed_time": 20.85535095212981 - }, - "speedup": 2.799991523936488, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 482304, - "linear_dense_total": 4718592, - "linear_nnz": 1268736, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 706560, - "linear_dense_total": 4718592, - "linear_nnz": 1296384, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 121344, - "linear_dense_total": 4718592, - "linear_nnz": 1104384, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 215040, - "linear_dense_total": 4718592, - "linear_nnz": 804864, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 850944, - "linear_dense_total": 4718592, - "linear_nnz": 1440768, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 826368, - "linear_dense_total": 4718592, - "linear_nnz": 2006016, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 923136, - "linear_dense_total": 4718592, - "linear_nnz": 1709568, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 880128, - "linear_dense_total": 4718592, - "linear_nnz": 1863168, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 645120, - "linear_dense_total": 4718592, - "linear_nnz": 1628160, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 525312, - "linear_dense_total": 4718592, - "linear_nnz": 1901568, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 333312, - "linear_dense_total": 4718592, - "linear_nnz": 923136, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 113664, - "linear_dense_total": 4718592, - "linear_nnz": 1096704, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 17043456, - "linear_sparsity": 74.57844574780059, - "linear_total": 67043328, - "nnz": 40951962, - "total": 90984386, - "total_sparsity": 54.990121052199 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10/checkpoint-47500": { - "eval_metrics": { - "exact_match": 83.74645222327341, - "f1": 90.16320537561052 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 37.53850735473633, - "eval_elapsed_time": 44.58338421070948 - }, - "speedup": 1.0281280670181348, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 192512, - "linear_dense_total": 8388608, - "linear_nnz": 1765376, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 794624, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 995328, - "linear_dense_total": 8388608, - "linear_nnz": 3616768, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1032192, - "linear_dense_total": 8388608, - "linear_nnz": 3653632, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1241088, - "linear_dense_total": 8388608, - "linear_nnz": 4386816, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1179648, - "linear_dense_total": 8388608, - "linear_nnz": 4325376, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 909312, - "linear_dense_total": 8388608, - "linear_nnz": 3792896, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 681984, - "linear_dense_total": 8388608, - "linear_nnz": 3303424, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 473088, - "linear_dense_total": 8388608, - "linear_nnz": 3356672, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 368640, - "linear_dense_total": 8388608, - "linear_nnz": 3252224, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 321536, - "linear_dense_total": 8388608, - "linear_nnz": 3205120, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 2367488, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 286720, - "linear_dense_total": 8388608, - "linear_nnz": 811008, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 899072, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 77824, - "linear_dense_total": 8388608, - "linear_nnz": 1388544, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 79872, - "linear_dense_total": 8388608, - "linear_nnz": 866304, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 182272, - "linear_dense_total": 8388608, - "linear_nnz": 1230848, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 413696, - "linear_dense_total": 8388608, - "linear_nnz": 1724416, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 466944, - "linear_dense_total": 8388608, - "linear_nnz": 991232, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 552960, - "linear_dense_total": 8388608, - "linear_nnz": 1077248, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 608256, - "linear_dense_total": 8388608, - "linear_nnz": 1394688, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 438272, - "linear_dense_total": 8388608, - "linear_nnz": 1748992, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 661504, - "linear_dense_total": 8388608, - "linear_nnz": 1710080, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 747520, - "linear_dense_total": 8388608, - "linear_nnz": 2320384, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 53983232, - "linear_sparsity": 77.76137284017278, - "linear_total": 242745344, - "nnz": 85952121, - "total": 274806402, - "total_sparsity": 68.72266425583491 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 10, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10/checkpoint-55330": { - "eval_metrics": { - "exact_match": 83.62346263008514, - "f1": 90.10843526218638 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 37.30008307647705, - "eval_elapsed_time": 44.469506811816245 - }, - "speedup": 1.034699920808227, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 192512, - "linear_dense_total": 8388608, - "linear_nnz": 1765376, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 794624, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 995328, - "linear_dense_total": 8388608, - "linear_nnz": 3616768, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1032192, - "linear_dense_total": 8388608, - "linear_nnz": 3653632, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1241088, - "linear_dense_total": 8388608, - "linear_nnz": 4386816, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1179648, - "linear_dense_total": 8388608, - "linear_nnz": 4325376, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 909312, - "linear_dense_total": 8388608, - "linear_nnz": 3792896, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 681984, - "linear_dense_total": 8388608, - "linear_nnz": 3303424, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 473088, - "linear_dense_total": 8388608, - "linear_nnz": 3356672, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 368640, - "linear_dense_total": 8388608, - "linear_nnz": 3252224, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 321536, - "linear_dense_total": 8388608, - "linear_nnz": 3205120, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 2367488, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 286720, - "linear_dense_total": 8388608, - "linear_nnz": 811008, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 899072, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 77824, - "linear_dense_total": 8388608, - "linear_nnz": 1388544, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 79872, - "linear_dense_total": 8388608, - "linear_nnz": 866304, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 182272, - "linear_dense_total": 8388608, - "linear_nnz": 1230848, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 413696, - "linear_dense_total": 8388608, - "linear_nnz": 1724416, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 466944, - "linear_dense_total": 8388608, - "linear_nnz": 991232, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 552960, - "linear_dense_total": 8388608, - "linear_nnz": 1077248, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 608256, - "linear_dense_total": 8388608, - "linear_nnz": 1394688, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 438272, - "linear_dense_total": 8388608, - "linear_nnz": 1748992, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 661504, - "linear_dense_total": 8388608, - "linear_nnz": 1710080, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 747520, - "linear_dense_total": 8388608, - "linear_nnz": 2320384, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 53983232, - "linear_sparsity": 77.76137284017278, - "linear_total": 242745344, - "nnz": 85952121, - "total": 274806402, - "total_sparsity": 68.72266425583491 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 10, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25/checkpoint-22500": { - "eval_metrics": { - "exact_match": 84.399243140965, - "f1": 90.84270784891945 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 41.6732879486084, - "eval_elapsed_time": 48.981834520120174 - }, - "speedup": 0.9261182619659336, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 835584, - "linear_dense_total": 8388608, - "linear_nnz": 2408448, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1275904, - "linear_dense_total": 8388608, - "linear_nnz": 1800192, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2410496, - "linear_dense_total": 8388608, - "linear_nnz": 5031936, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2510848, - "linear_dense_total": 8388608, - "linear_nnz": 4870144, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 2660352, - "linear_dense_total": 8388608, - "linear_nnz": 4757504, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 2605056, - "linear_dense_total": 8388608, - "linear_nnz": 5750784, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2299904, - "linear_dense_total": 8388608, - "linear_nnz": 4921344, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1699840, - "linear_dense_total": 8388608, - "linear_nnz": 4321280, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1402880, - "linear_dense_total": 8388608, - "linear_nnz": 4024320, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1097728, - "linear_dense_total": 8388608, - "linear_nnz": 4243456, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 3260416, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 1835008, - "linear_attention_total": 1835008, - "linear_dense_nnz": 739328, - "linear_dense_total": 8388608, - "linear_nnz": 2574336, - "linear_total": 10223616, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1359872, - "linear_dense_total": 8388608, - "linear_nnz": 1884160, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 358400, - "linear_dense_total": 8388608, - "linear_nnz": 1406976, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 194560, - "linear_dense_total": 8388608, - "linear_nnz": 1243136, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 180224, - "linear_dense_total": 8388608, - "linear_nnz": 704512, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 323584, - "linear_dense_total": 8388608, - "linear_nnz": 1634304, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1685504, - "linear_dense_total": 8388608, - "linear_nnz": 2734080, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1767424, - "linear_dense_total": 8388608, - "linear_nnz": 2291712, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1873920, - "linear_dense_total": 8388608, - "linear_nnz": 2660352, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 2054144, - "linear_dense_total": 8388608, - "linear_nnz": 2578432, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1773568, - "linear_dense_total": 8388608, - "linear_nnz": 2822144, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1968128, - "linear_dense_total": 8388608, - "linear_nnz": 2492416, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 1986560, - "linear_dense_total": 8388608, - "linear_nnz": 3297280, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 73713664, - "linear_sparsity": 69.16718064692982, - "linear_total": 239075328, - "nnz": 105691291, - "total": 271133698, - "total_sparsity": 61.01875503501597 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 5, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25/checkpoint-25000": { - "eval_metrics": { - "exact_match": 84.20056764427625, - "f1": 90.73941291394593 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 41.50353849792481, - "eval_elapsed_time": 49.06402187002823 - }, - "speedup": 0.929906085171529, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 835584, - "linear_dense_total": 8388608, - "linear_nnz": 2408448, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1275904, - "linear_dense_total": 8388608, - "linear_nnz": 1800192, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2410496, - "linear_dense_total": 8388608, - "linear_nnz": 5031936, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2510848, - "linear_dense_total": 8388608, - "linear_nnz": 4870144, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 2660352, - "linear_dense_total": 8388608, - "linear_nnz": 4757504, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 2605056, - "linear_dense_total": 8388608, - "linear_nnz": 5750784, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2299904, - "linear_dense_total": 8388608, - "linear_nnz": 4921344, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1699840, - "linear_dense_total": 8388608, - "linear_nnz": 4321280, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1402880, - "linear_dense_total": 8388608, - "linear_nnz": 4024320, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1097728, - "linear_dense_total": 8388608, - "linear_nnz": 4243456, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 3260416, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 1835008, - "linear_attention_total": 1835008, - "linear_dense_nnz": 739328, - "linear_dense_total": 8388608, - "linear_nnz": 2574336, - "linear_total": 10223616, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1359872, - "linear_dense_total": 8388608, - "linear_nnz": 1884160, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 358400, - "linear_dense_total": 8388608, - "linear_nnz": 1406976, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 194560, - "linear_dense_total": 8388608, - "linear_nnz": 1243136, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 180224, - "linear_dense_total": 8388608, - "linear_nnz": 704512, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 323584, - "linear_dense_total": 8388608, - "linear_nnz": 1634304, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1685504, - "linear_dense_total": 8388608, - "linear_nnz": 2734080, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1767424, - "linear_dense_total": 8388608, - "linear_nnz": 2291712, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1873920, - "linear_dense_total": 8388608, - "linear_nnz": 2660352, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 2054144, - "linear_dense_total": 8388608, - "linear_nnz": 2578432, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1773568, - "linear_dense_total": 8388608, - "linear_nnz": 2822144, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1968128, - "linear_dense_total": 8388608, - "linear_nnz": 2492416, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 1986560, - "linear_dense_total": 8388608, - "linear_nnz": 3297280, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 73713664, - "linear_sparsity": 69.16718064692982, - "linear_total": 239075328, - "nnz": 105691291, - "total": 271133698, - "total_sparsity": 61.01875503501597 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 5, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25/checkpoint-27665": { - "eval_metrics": { - "exact_match": 84.2100283822138, - "f1": 90.70141124860059 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 41.6272840423584, - "eval_elapsed_time": 49.02150737866759 - }, - "speedup": 0.9271417507348992, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 835584, - "linear_dense_total": 8388608, - "linear_nnz": 2408448, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1275904, - "linear_dense_total": 8388608, - "linear_nnz": 1800192, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2410496, - "linear_dense_total": 8388608, - "linear_nnz": 5031936, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2510848, - "linear_dense_total": 8388608, - "linear_nnz": 4870144, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 2660352, - "linear_dense_total": 8388608, - "linear_nnz": 4757504, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 2605056, - "linear_dense_total": 8388608, - "linear_nnz": 5750784, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2299904, - "linear_dense_total": 8388608, - "linear_nnz": 4921344, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1699840, - "linear_dense_total": 8388608, - "linear_nnz": 4321280, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1402880, - "linear_dense_total": 8388608, - "linear_nnz": 4024320, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1097728, - "linear_dense_total": 8388608, - "linear_nnz": 4243456, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 3260416, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 1835008, - "linear_attention_total": 1835008, - "linear_dense_nnz": 739328, - "linear_dense_total": 8388608, - "linear_nnz": 2574336, - "linear_total": 10223616, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1359872, - "linear_dense_total": 8388608, - "linear_nnz": 1884160, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 358400, - "linear_dense_total": 8388608, - "linear_nnz": 1406976, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 194560, - "linear_dense_total": 8388608, - "linear_nnz": 1243136, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 180224, - "linear_dense_total": 8388608, - "linear_nnz": 704512, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 323584, - "linear_dense_total": 8388608, - "linear_nnz": 1634304, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1685504, - "linear_dense_total": 8388608, - "linear_nnz": 2734080, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1767424, - "linear_dense_total": 8388608, - "linear_nnz": 2291712, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1873920, - "linear_dense_total": 8388608, - "linear_nnz": 2660352, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 2054144, - "linear_dense_total": 8388608, - "linear_nnz": 2578432, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1773568, - "linear_dense_total": 8388608, - "linear_nnz": 2822144, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1968128, - "linear_dense_total": 8388608, - "linear_nnz": 2492416, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 1986560, - "linear_dense_total": 8388608, - "linear_nnz": 3297280, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 73713664, - "linear_sparsity": 69.16718064692982, - "linear_total": 239075328, - "nnz": 105691291, - "total": 271133698, - "total_sparsity": 61.01875503501597 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 5, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25_v3_f91.03/checkpoint-55000": { - "eval_metrics": { - "exact_match": 84.63576158940397, - "f1": 91.0266636723574 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 41.85157574462891, - "eval_elapsed_time": 49.32021534908563 - }, - "speedup": 0.9221729963255725, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 835584, - "linear_dense_total": 8388608, - "linear_nnz": 2408448, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1275904, - "linear_dense_total": 8388608, - "linear_nnz": 1800192, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2410496, - "linear_dense_total": 8388608, - "linear_nnz": 5031936, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2510848, - "linear_dense_total": 8388608, - "linear_nnz": 4870144, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 2660352, - "linear_dense_total": 8388608, - "linear_nnz": 4757504, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 2605056, - "linear_dense_total": 8388608, - "linear_nnz": 5750784, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2299904, - "linear_dense_total": 8388608, - "linear_nnz": 4921344, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1699840, - "linear_dense_total": 8388608, - "linear_nnz": 4321280, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1402880, - "linear_dense_total": 8388608, - "linear_nnz": 4024320, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1097728, - "linear_dense_total": 8388608, - "linear_nnz": 4243456, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 3260416, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 1835008, - "linear_attention_total": 1835008, - "linear_dense_nnz": 739328, - "linear_dense_total": 8388608, - "linear_nnz": 2574336, - "linear_total": 10223616, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1359872, - "linear_dense_total": 8388608, - "linear_nnz": 1884160, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 358400, - "linear_dense_total": 8388608, - "linear_nnz": 1406976, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 194560, - "linear_dense_total": 8388608, - "linear_nnz": 1243136, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 180224, - "linear_dense_total": 8388608, - "linear_nnz": 704512, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 323584, - "linear_dense_total": 8388608, - "linear_nnz": 1634304, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1685504, - "linear_dense_total": 8388608, - "linear_nnz": 2734080, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1767424, - "linear_dense_total": 8388608, - "linear_nnz": 2291712, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1873920, - "linear_dense_total": 8388608, - "linear_nnz": 2660352, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 2054144, - "linear_dense_total": 8388608, - "linear_nnz": 2578432, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1773568, - "linear_dense_total": 8388608, - "linear_nnz": 2822144, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1968128, - "linear_dense_total": 8388608, - "linear_nnz": 2492416, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 1986560, - "linear_dense_total": 8388608, - "linear_nnz": 3297280, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 73713664, - "linear_sparsity": 69.16718064692982, - "linear_total": 239075328, - "nnz": 105691291, - "total": 271133698, - "total_sparsity": 61.01875503501597 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 10, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25_v3_f91.03/checkpoint-55330": { - "eval_metrics": { - "exact_match": 84.65468306527909, - "f1": 91.01004624462917 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 41.85431317138672, - "eval_elapsed_time": 49.428419118281454 - }, - "speedup": 0.922112682803639, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 835584, - "linear_dense_total": 8388608, - "linear_nnz": 2408448, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1275904, - "linear_dense_total": 8388608, - "linear_nnz": 1800192, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2410496, - "linear_dense_total": 8388608, - "linear_nnz": 5031936, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2510848, - "linear_dense_total": 8388608, - "linear_nnz": 4870144, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 2660352, - "linear_dense_total": 8388608, - "linear_nnz": 4757504, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 2605056, - "linear_dense_total": 8388608, - "linear_nnz": 5750784, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2299904, - "linear_dense_total": 8388608, - "linear_nnz": 4921344, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1699840, - "linear_dense_total": 8388608, - "linear_nnz": 4321280, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1402880, - "linear_dense_total": 8388608, - "linear_nnz": 4024320, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1097728, - "linear_dense_total": 8388608, - "linear_nnz": 4243456, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 3260416, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 1835008, - "linear_attention_total": 1835008, - "linear_dense_nnz": 739328, - "linear_dense_total": 8388608, - "linear_nnz": 2574336, - "linear_total": 10223616, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1359872, - "linear_dense_total": 8388608, - "linear_nnz": 1884160, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 358400, - "linear_dense_total": 8388608, - "linear_nnz": 1406976, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 194560, - "linear_dense_total": 8388608, - "linear_nnz": 1243136, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 180224, - "linear_dense_total": 8388608, - "linear_nnz": 704512, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 323584, - "linear_dense_total": 8388608, - "linear_nnz": 1634304, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1685504, - "linear_dense_total": 8388608, - "linear_nnz": 2734080, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1767424, - "linear_dense_total": 8388608, - "linear_nnz": 2291712, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1873920, - "linear_dense_total": 8388608, - "linear_nnz": 2660352, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 2054144, - "linear_dense_total": 8388608, - "linear_nnz": 2578432, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1773568, - "linear_dense_total": 8388608, - "linear_nnz": 2822144, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1968128, - "linear_dense_total": 8388608, - "linear_nnz": 2492416, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 1986560, - "linear_dense_total": 8388608, - "linear_nnz": 3297280, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 73713664, - "linear_sparsity": 69.16718064692982, - "linear_total": 239075328, - "nnz": 105691291, - "total": 271133698, - "total_sparsity": 61.01875503501597 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 10, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-215000": { - "eval_metrics": { - "exact_match": 82.33680227057711, - "f1": 89.04761607630476 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": false, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 37.50764268493653, - "eval_elapsed_time": 44.93039320781827 - }, - "speedup": 1.0289741034797428, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 974848, - "linear_attention_total": 4194304, - "linear_dense_nnz": 192512, - "linear_dense_total": 8388608, - "linear_nnz": 1167360, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 306176, - "linear_attention_total": 4194304, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 576512, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 1714176, - "linear_attention_total": 4194304, - "linear_dense_nnz": 995328, - "linear_dense_total": 8388608, - "linear_nnz": 2709504, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 1875968, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1032192, - "linear_dense_total": 8388608, - "linear_nnz": 2908160, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 1832960, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1241088, - "linear_dense_total": 8388608, - "linear_nnz": 3074048, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 2155520, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1179648, - "linear_dense_total": 8388608, - "linear_nnz": 3335168, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 1942528, - "linear_attention_total": 4194304, - "linear_dense_nnz": 909312, - "linear_dense_total": 8388608, - "linear_nnz": 2851840, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2079744, - "linear_attention_total": 4194304, - "linear_dense_nnz": 681984, - "linear_dense_total": 8388608, - "linear_nnz": 2761728, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 1843200, - "linear_attention_total": 4194304, - "linear_dense_nnz": 473088, - "linear_dense_total": 8388608, - "linear_nnz": 2316288, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 1582080, - "linear_attention_total": 4194304, - "linear_dense_nnz": 368640, - "linear_dense_total": 8388608, - "linear_nnz": 1950720, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 1435648, - "linear_attention_total": 4194304, - "linear_dense_nnz": 321536, - "linear_dense_total": 8388608, - "linear_nnz": 1757184, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 717824, - "linear_attention_total": 4194304, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 988160, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 297984, - "linear_attention_total": 4194304, - "linear_dense_nnz": 286720, - "linear_dense_total": 8388608, - "linear_nnz": 584704, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 334848, - "linear_attention_total": 4194304, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 447488, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 358400, - "linear_attention_total": 4194304, - "linear_dense_nnz": 77824, - "linear_dense_total": 8388608, - "linear_nnz": 436224, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 134144, - "linear_attention_total": 4194304, - "linear_dense_nnz": 79872, - "linear_dense_total": 8388608, - "linear_nnz": 214016, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 111616, - "linear_attention_total": 4194304, - "linear_dense_nnz": 182272, - "linear_dense_total": 8388608, - "linear_nnz": 293888, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 834560, - "linear_attention_total": 4194304, - "linear_dense_nnz": 413696, - "linear_dense_total": 8388608, - "linear_nnz": 1248256, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 381952, - "linear_attention_total": 4194304, - "linear_dense_nnz": 466944, - "linear_dense_total": 8388608, - "linear_nnz": 848896, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 406528, - "linear_attention_total": 4194304, - "linear_dense_nnz": 552960, - "linear_dense_total": 8388608, - "linear_nnz": 959488, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 522240, - "linear_attention_total": 4194304, - "linear_dense_nnz": 608256, - "linear_dense_total": 8388608, - "linear_nnz": 1130496, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 771072, - "linear_attention_total": 4194304, - "linear_dense_nnz": 438272, - "linear_dense_total": 8388608, - "linear_nnz": 1209344, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 414720, - "linear_attention_total": 4194304, - "linear_dense_nnz": 661504, - "linear_dense_total": 8388608, - "linear_nnz": 1076224, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1091584, - "linear_attention_total": 4194304, - "linear_dense_nnz": 747520, - "linear_dense_total": 8388608, - "linear_nnz": 1839104, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 36684800, - "linear_sparsity": 87.85230848524306, - "linear_total": 301989888, - "nnz": 68649433, - "total": 334094338, - "total_sparsity": 79.45208128609471 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test_large/squad_test_large_regu-10", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test_large/squad_test_large_regu-10", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 8, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test_large/squad_test_large_regu-10", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-220000": { - "eval_metrics": { - "exact_match": 82.13812677388836, - "f1": 89.03656646065757 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": false, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 37.54432637023926, - "eval_elapsed_time": 44.93571184715256 - }, - "speedup": 1.0279687168915141, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 989184, - "linear_attention_total": 4194304, - "linear_dense_nnz": 192512, - "linear_dense_total": 8388608, - "linear_nnz": 1181696, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 323584, - "linear_attention_total": 4194304, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 593920, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 1745920, - "linear_attention_total": 4194304, - "linear_dense_nnz": 995328, - "linear_dense_total": 8388608, - "linear_nnz": 2741248, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 1902592, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1032192, - "linear_dense_total": 8388608, - "linear_nnz": 2934784, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 1782784, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1241088, - "linear_dense_total": 8388608, - "linear_nnz": 3023872, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 2147328, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1179648, - "linear_dense_total": 8388608, - "linear_nnz": 3326976, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 1917952, - "linear_attention_total": 4194304, - "linear_dense_nnz": 909312, - "linear_dense_total": 8388608, - "linear_nnz": 2827264, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2049024, - "linear_attention_total": 4194304, - "linear_dense_nnz": 681984, - "linear_dense_total": 8388608, - "linear_nnz": 2731008, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 1820672, - "linear_attention_total": 4194304, - "linear_dense_nnz": 473088, - "linear_dense_total": 8388608, - "linear_nnz": 2293760, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 1562624, - "linear_attention_total": 4194304, - "linear_dense_nnz": 368640, - "linear_dense_total": 8388608, - "linear_nnz": 1931264, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 1390592, - "linear_attention_total": 4194304, - "linear_dense_nnz": 321536, - "linear_dense_total": 8388608, - "linear_nnz": 1712128, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 688128, - "linear_attention_total": 4194304, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 958464, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 286720, - "linear_attention_total": 4194304, - "linear_dense_nnz": 286720, - "linear_dense_total": 8388608, - "linear_nnz": 573440, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 326656, - "linear_attention_total": 4194304, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 439296, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 344064, - "linear_attention_total": 4194304, - "linear_dense_nnz": 77824, - "linear_dense_total": 8388608, - "linear_nnz": 421888, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 129024, - "linear_attention_total": 4194304, - "linear_dense_nnz": 79872, - "linear_dense_total": 8388608, - "linear_nnz": 208896, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 116736, - "linear_attention_total": 4194304, - "linear_dense_nnz": 182272, - "linear_dense_total": 8388608, - "linear_nnz": 299008, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 801792, - "linear_attention_total": 4194304, - "linear_dense_nnz": 413696, - "linear_dense_total": 8388608, - "linear_nnz": 1215488, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 396288, - "linear_attention_total": 4194304, - "linear_dense_nnz": 466944, - "linear_dense_total": 8388608, - "linear_nnz": 863232, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 405504, - "linear_attention_total": 4194304, - "linear_dense_nnz": 552960, - "linear_dense_total": 8388608, - "linear_nnz": 958464, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 520192, - "linear_attention_total": 4194304, - "linear_dense_nnz": 604160, - "linear_dense_total": 8388608, - "linear_nnz": 1124352, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 764928, - "linear_attention_total": 4194304, - "linear_dense_nnz": 438272, - "linear_dense_total": 8388608, - "linear_nnz": 1203200, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 423936, - "linear_attention_total": 4194304, - "linear_dense_nnz": 659456, - "linear_dense_total": 8388608, - "linear_nnz": 1083392, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1070080, - "linear_attention_total": 4194304, - "linear_dense_nnz": 747520, - "linear_dense_total": 8388608, - "linear_nnz": 1817600, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 36464640, - "linear_sparsity": 87.92521158854166, - "linear_total": 301989888, - "nnz": 68429014, - "total": 334094338, - "total_sparsity": 79.51805636406804 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test_large/squad_test_large_regu-10", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test_large/squad_test_large_regu-10", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 8, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test_large/squad_test_large_regu-10", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-221320": { - "eval_metrics": { - "exact_match": 82.30842005676443, - "f1": 89.04987146464723 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": false, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 37.53598588562012, - "eval_elapsed_time": 44.935436787083745 - }, - "speedup": 1.028197131226982, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 978944, - "linear_attention_total": 4194304, - "linear_dense_nnz": 192512, - "linear_dense_total": 8388608, - "linear_nnz": 1171456, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 319488, - "linear_attention_total": 4194304, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 589824, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 1754112, - "linear_attention_total": 4194304, - "linear_dense_nnz": 995328, - "linear_dense_total": 8388608, - "linear_nnz": 2749440, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 1922048, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1032192, - "linear_dense_total": 8388608, - "linear_nnz": 2954240, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 1775616, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1241088, - "linear_dense_total": 8388608, - "linear_nnz": 3016704, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 2149376, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1179648, - "linear_dense_total": 8388608, - "linear_nnz": 3329024, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 1954816, - "linear_attention_total": 4194304, - "linear_dense_nnz": 909312, - "linear_dense_total": 8388608, - "linear_nnz": 2864128, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2065408, - "linear_attention_total": 4194304, - "linear_dense_nnz": 681984, - "linear_dense_total": 8388608, - "linear_nnz": 2747392, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 1823744, - "linear_attention_total": 4194304, - "linear_dense_nnz": 473088, - "linear_dense_total": 8388608, - "linear_nnz": 2296832, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 1558528, - "linear_attention_total": 4194304, - "linear_dense_nnz": 368640, - "linear_dense_total": 8388608, - "linear_nnz": 1927168, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 1356800, - "linear_attention_total": 4194304, - "linear_dense_nnz": 321536, - "linear_dense_total": 8388608, - "linear_nnz": 1678336, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 688128, - "linear_attention_total": 4194304, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 958464, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 293888, - "linear_attention_total": 4194304, - "linear_dense_nnz": 286720, - "linear_dense_total": 8388608, - "linear_nnz": 580608, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 326656, - "linear_attention_total": 4194304, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 439296, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 344064, - "linear_attention_total": 4194304, - "linear_dense_nnz": 77824, - "linear_dense_total": 8388608, - "linear_nnz": 421888, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 129024, - "linear_attention_total": 4194304, - "linear_dense_nnz": 79872, - "linear_dense_total": 8388608, - "linear_nnz": 208896, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 116736, - "linear_attention_total": 4194304, - "linear_dense_nnz": 182272, - "linear_dense_total": 8388608, - "linear_nnz": 299008, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 795648, - "linear_attention_total": 4194304, - "linear_dense_nnz": 413696, - "linear_dense_total": 8388608, - "linear_nnz": 1209344, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 395264, - "linear_attention_total": 4194304, - "linear_dense_nnz": 466944, - "linear_dense_total": 8388608, - "linear_nnz": 862208, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 392192, - "linear_attention_total": 4194304, - "linear_dense_nnz": 552960, - "linear_dense_total": 8388608, - "linear_nnz": 945152, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 523264, - "linear_attention_total": 4194304, - "linear_dense_nnz": 604160, - "linear_dense_total": 8388608, - "linear_nnz": 1127424, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 784384, - "linear_attention_total": 4194304, - "linear_dense_nnz": 438272, - "linear_dense_total": 8388608, - "linear_nnz": 1222656, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 416768, - "linear_attention_total": 4194304, - "linear_dense_nnz": 659456, - "linear_dense_total": 8388608, - "linear_nnz": 1076224, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1069056, - "linear_attention_total": 4194304, - "linear_dense_nnz": 747520, - "linear_dense_total": 8388608, - "linear_nnz": 1816576, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 36492288, - "linear_sparsity": 87.91605631510416, - "linear_total": 301989888, - "nnz": 68456822, - "total": 334094338, - "total_sparsity": 79.50973296650122 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test_large/squad_test_large_regu-10", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test_large/squad_test_large_regu-10", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 8, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test_large/squad_test_large_regu-10", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10_d0.25/checkpoint-210000": { - "eval_metrics": { - "exact_match": 83.78429517502366, - "f1": 90.32458147221426 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": false, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 41.496326583862306, - "eval_elapsed_time": 49.08256564009935 - }, - "speedup": 0.9300676995438012, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 783360, - "linear_attention_total": 4194304, - "linear_dense_nnz": 835584, - "linear_dense_total": 8388608, - "linear_nnz": 1618944, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 326656, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1275904, - "linear_dense_total": 8388608, - "linear_nnz": 1602560, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 1636352, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2410496, - "linear_dense_total": 8388608, - "linear_nnz": 4046848, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 1575936, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2510848, - "linear_dense_total": 8388608, - "linear_nnz": 4086784, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 1203200, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2660352, - "linear_dense_total": 8388608, - "linear_nnz": 3863552, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 2030592, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2605056, - "linear_dense_total": 8388608, - "linear_nnz": 4635648, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 1785856, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2299904, - "linear_dense_total": 8388608, - "linear_nnz": 4085760, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 1946624, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1699840, - "linear_dense_total": 8388608, - "linear_nnz": 3646464, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 1647616, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1402880, - "linear_dense_total": 8388608, - "linear_nnz": 3050496, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 1538048, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1097728, - "linear_dense_total": 8388608, - "linear_nnz": 2635776, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 1169408, - "linear_attention_total": 4194304, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 2070528, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 607232, - "linear_attention_total": 4194304, - "linear_dense_nnz": 739328, - "linear_dense_total": 8388608, - "linear_nnz": 1346560, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 305152, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1359872, - "linear_dense_total": 8388608, - "linear_nnz": 1665024, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 396288, - "linear_attention_total": 4194304, - "linear_dense_nnz": 358400, - "linear_dense_total": 8388608, - "linear_nnz": 754688, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 284672, - "linear_attention_total": 4194304, - "linear_dense_nnz": 194560, - "linear_dense_total": 8388608, - "linear_nnz": 479232, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 70656, - "linear_attention_total": 4194304, - "linear_dense_nnz": 180224, - "linear_dense_total": 8388608, - "linear_nnz": 250880, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 111616, - "linear_attention_total": 4194304, - "linear_dense_nnz": 323584, - "linear_dense_total": 8388608, - "linear_nnz": 435200, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 626688, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1685504, - "linear_dense_total": 8388608, - "linear_nnz": 2312192, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 369664, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1767424, - "linear_dense_total": 8388608, - "linear_nnz": 2137088, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 463872, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1873920, - "linear_dense_total": 8388608, - "linear_nnz": 2337792, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 294912, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2054144, - "linear_dense_total": 8388608, - "linear_nnz": 2349056, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 613376, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1773568, - "linear_dense_total": 8388608, - "linear_nnz": 2386944, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 208896, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1968128, - "linear_dense_total": 8388608, - "linear_nnz": 2177024, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 923648, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1986560, - "linear_dense_total": 8388608, - "linear_nnz": 2910208, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 56885248, - "linear_sparsity": 81.16319444444444, - "linear_total": 301989888, - "nnz": 88857851, - "total": 334094338, - "total_sparsity": 73.40336518962498 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 10000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test_large/squad_test_large_regu_10_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test_large/squad_test_large_regu_10_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 8, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test_large/squad_test_large_regu_10_d0.25", - "save_steps": 10000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10_d0.25/checkpoint-221320": { - "eval_metrics": { - "exact_match": 83.66130558183538, - "f1": 90.22195941338013 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": false, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 41.275371505737304, - "eval_elapsed_time": 48.98561626393348 - }, - "speedup": 0.9350465325310627, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 766976, - "linear_attention_total": 4194304, - "linear_dense_nnz": 831488, - "linear_dense_total": 8388608, - "linear_nnz": 1598464, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 338944, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1273856, - "linear_dense_total": 8388608, - "linear_nnz": 1612800, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 1596416, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2408448, - "linear_dense_total": 8388608, - "linear_nnz": 4004864, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 1615872, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2508800, - "linear_dense_total": 8388608, - "linear_nnz": 4124672, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 1205248, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2658304, - "linear_dense_total": 8388608, - "linear_nnz": 3863552, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 2006016, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2603008, - "linear_dense_total": 8388608, - "linear_nnz": 4609024, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 1718272, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2299904, - "linear_dense_total": 8388608, - "linear_nnz": 4018176, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 1935360, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1699840, - "linear_dense_total": 8388608, - "linear_nnz": 3635200, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 1612800, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1402880, - "linear_dense_total": 8388608, - "linear_nnz": 3015680, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 1502208, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1097728, - "linear_dense_total": 8388608, - "linear_nnz": 2599936, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 1167360, - "linear_attention_total": 4194304, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 2068480, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 601088, - "linear_attention_total": 4194304, - "linear_dense_nnz": 739328, - "linear_dense_total": 8388608, - "linear_nnz": 1340416, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 305152, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1357824, - "linear_dense_total": 8388608, - "linear_nnz": 1662976, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 364544, - "linear_attention_total": 4194304, - "linear_dense_nnz": 356352, - "linear_dense_total": 8388608, - "linear_nnz": 720896, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 274432, - "linear_attention_total": 4194304, - "linear_dense_nnz": 194560, - "linear_dense_total": 8388608, - "linear_nnz": 468992, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 70656, - "linear_attention_total": 4194304, - "linear_dense_nnz": 180224, - "linear_dense_total": 8388608, - "linear_nnz": 250880, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 102400, - "linear_attention_total": 4194304, - "linear_dense_nnz": 321536, - "linear_dense_total": 8388608, - "linear_nnz": 423936, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 621568, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1685504, - "linear_dense_total": 8388608, - "linear_nnz": 2307072, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 377856, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1767424, - "linear_dense_total": 8388608, - "linear_nnz": 2145280, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 460800, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1871872, - "linear_dense_total": 8388608, - "linear_nnz": 2332672, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 309248, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2054144, - "linear_dense_total": 8388608, - "linear_nnz": 2363392, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 583680, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1773568, - "linear_dense_total": 8388608, - "linear_nnz": 2357248, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 215040, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1966080, - "linear_dense_total": 8388608, - "linear_nnz": 2181120, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 916480, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1986560, - "linear_dense_total": 8388608, - "linear_nnz": 2903040, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 56608768, - "linear_sparsity": 81.25474717881944, - "linear_total": 301989888, - "nnz": 88581359, - "total": 334094338, - "total_sparsity": 73.4861238504437 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 10000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test_large/squad_test_large_regu_10_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test_large/squad_test_large_regu_10_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 8, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test_large/squad_test_large_regu_10_d0.25", - "save_steps": 10000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - } - } -} \ No newline at end of file diff --git a/analysis/files/test2.json b/analysis/files/test2.json deleted file mode 100644 index 19bc3b8f..00000000 --- a/analysis/files/test2.json +++ /dev/null @@ -1,40488 +0,0 @@ -{ - "base_speed_report": { - "cuda_eval_elapsed_time": 38.594393005371096, - "eval_elapsed_time": 45.63197132572532 - }, - "checkpoints": { - "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr1_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.15799432355723, - "f1": 86.94169166073364 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 768, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5 - }, - "speed": { - "cuda_eval_elapsed_time": 22.747020225524903, - "eval_elapsed_time": 29.958857133984566 - }, - "speedup": 1.6966790648941144, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 427776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1617408, - "linear_dense_total": 4718592, - "linear_nnz": 2045184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 394752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1708032, - "linear_dense_total": 4718592, - "linear_nnz": 2102784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 182784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 245760, - "linear_dense_total": 4718592, - "linear_nnz": 428544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 112128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 626688, - "linear_dense_total": 4718592, - "linear_nnz": 738816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 469248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1955328, - "linear_dense_total": 4718592, - "linear_nnz": 2424576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 579840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1923072, - "linear_dense_total": 4718592, - "linear_nnz": 2502912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 539904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1837056, - "linear_dense_total": 4718592, - "linear_nnz": 2376960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 424704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1777152, - "linear_dense_total": 4718592, - "linear_nnz": 2201856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 439296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1468416, - "linear_dense_total": 4718592, - "linear_nnz": 1907712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 428544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1152000, - "linear_dense_total": 4718592, - "linear_nnz": 1580544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 397824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 697344, - "linear_dense_total": 4718592, - "linear_nnz": 1095168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 235776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 291840, - "linear_dense_total": 4718592, - "linear_nnz": 527616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 19932672, - "linear_sparsity": 76.53175636574075, - "linear_total": 84934656, - "nnz": 43891202, - "total": 108893186, - "total_sparsity": 59.6933438975695 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.2620624408704, - "f1": 86.97825692623259 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 16.405798454284668, - "eval_elapsed_time": 23.622337056789547 - }, - "speedup": 2.3524848920286154, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 519424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 973824, - "linear_dense_total": 4718592, - "linear_nnz": 1493248, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 565504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1191936, - "linear_dense_total": 4718592, - "linear_nnz": 1757440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 346368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 167424, - "linear_dense_total": 4718592, - "linear_nnz": 513792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 220160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 423936, - "linear_dense_total": 4718592, - "linear_nnz": 644096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 646400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1382400, - "linear_dense_total": 4718592, - "linear_nnz": 2028800, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 937728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1359360, - "linear_dense_total": 4718592, - "linear_nnz": 2297088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 846592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1423872, - "linear_dense_total": 4718592, - "linear_nnz": 2270464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 688640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1393152, - "linear_dense_total": 4718592, - "linear_nnz": 2081792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 744704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1070592, - "linear_dense_total": 4718592, - "linear_nnz": 1815296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 831488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 781824, - "linear_dense_total": 4718592, - "linear_nnz": 1613312, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 522496, - "linear_attention_total": 2359296, - "linear_dense_nnz": 446976, - "linear_dense_total": 4718592, - "linear_nnz": 969472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 413696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 181248, - "linear_dense_total": 4718592, - "linear_nnz": 594944, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 18079744, - "linear_sparsity": 78.7133487654321, - "linear_total": 84934656, - "nnz": 42038274, - "total": 108893186, - "total_sparsity": 61.39494531824976 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test/hp_es-steps_nte20_ls250_est5000_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.6244087038789, - "f1": 88.07723643002453 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5 - }, - "speed": { - "cuda_eval_elapsed_time": 19.890604362487792, - "eval_elapsed_time": 27.08285549096763 - }, - "speedup": 1.9403328477116193, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 721408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1492992, - "linear_dense_total": 4718592, - "linear_nnz": 2214400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 635136, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1755648, - "linear_dense_total": 4718592, - "linear_nnz": 2390784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 484608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 198144, - "linear_dense_total": 4718592, - "linear_nnz": 682752, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 313600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 666624, - "linear_dense_total": 4718592, - "linear_nnz": 980224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 972032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1878528, - "linear_dense_total": 4718592, - "linear_nnz": 2850560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1256448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1932288, - "linear_dense_total": 4718592, - "linear_nnz": 3188736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1260544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1889280, - "linear_dense_total": 4718592, - "linear_nnz": 3149824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1121280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1784832, - "linear_dense_total": 4718592, - "linear_nnz": 2906112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1061888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1393152, - "linear_dense_total": 4718592, - "linear_nnz": 2455040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 988160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1027584, - "linear_dense_total": 4718592, - "linear_nnz": 2015744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 903424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 646656, - "linear_dense_total": 4718592, - "linear_nnz": 1550080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 636416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 250368, - "linear_dense_total": 4718592, - "linear_nnz": 886784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 25271040, - "linear_sparsity": 70.2464916087963, - "linear_total": 84934656, - "nnz": 49229570, - "total": 108893186, - "total_sparsity": 54.79095450471988 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte12_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.99053926206244, - "f1": 87.56439208763325 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 2, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 19.37784966278076, - "eval_elapsed_time": 26.613120706751943 - }, - "speedup": 1.9916757368336773, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 684800, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2007552, - "linear_dense_total": 4718592, - "linear_nnz": 2692352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 646656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2019840, - "linear_dense_total": 4718592, - "linear_nnz": 2666496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 432128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 274944, - "linear_dense_total": 4718592, - "linear_nnz": 707072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 277760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 794112, - "linear_dense_total": 4718592, - "linear_nnz": 1071872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 691712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2239488, - "linear_dense_total": 4718592, - "linear_nnz": 2931200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1149184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2211840, - "linear_dense_total": 4718592, - "linear_nnz": 3361024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1007872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2158080, - "linear_dense_total": 4718592, - "linear_nnz": 3165952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 997376, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2073600, - "linear_dense_total": 4718592, - "linear_nnz": 3070976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 911872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1732608, - "linear_dense_total": 4718592, - "linear_nnz": 2644480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 944640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1304064, - "linear_dense_total": 4718592, - "linear_nnz": 2248704, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 763136, - "linear_attention_total": 2359296, - "linear_dense_nnz": 751104, - "linear_dense_total": 4718592, - "linear_nnz": 1514240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 526080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 313344, - "linear_dense_total": 4718592, - "linear_nnz": 839424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 26913792, - "linear_sparsity": 68.31235532407408, - "linear_total": 84934656, - "nnz": 50872322, - "total": 108893186, - "total_sparsity": 53.282364242699266 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test2", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 12, - "output_dir": "output/squad_test2", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test2", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte20_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl15_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.7038789025544, - "f1": 86.58426699451658 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 15 - }, - "speed": { - "cuda_eval_elapsed_time": 15.051653835296632, - "eval_elapsed_time": 22.226274209097028 - }, - "speedup": 2.56412972472606, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 459776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 749568, - "linear_dense_total": 4718592, - "linear_nnz": 1209344, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 488192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1006080, - "linear_dense_total": 4718592, - "linear_nnz": 1494272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 311040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 148992, - "linear_dense_total": 4718592, - "linear_nnz": 460032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 207360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 311808, - "linear_dense_total": 4718592, - "linear_nnz": 519168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 550144, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1085952, - "linear_dense_total": 4718592, - "linear_nnz": 1636096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 868352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1101312, - "linear_dense_total": 4718592, - "linear_nnz": 1969664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 548864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1198080, - "linear_dense_total": 4718592, - "linear_nnz": 1746944, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 653312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1128960, - "linear_dense_total": 4718592, - "linear_nnz": 1782272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 593920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 867840, - "linear_dense_total": 4718592, - "linear_nnz": 1461760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 721920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 669696, - "linear_dense_total": 4718592, - "linear_nnz": 1391616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 367616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 387072, - "linear_dense_total": 4718592, - "linear_nnz": 754688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 373760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 158208, - "linear_dense_total": 4718592, - "linear_nnz": 531968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 14957824, - "linear_sparsity": 82.38902150848766, - "linear_total": 84934656, - "nnz": 38916354, - "total": 108893186, - "total_sparsity": 64.26190156654981 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test2", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test2", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test2", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test2/hp_od-output__squad_test2_es-steps_nte30_ls250_est5000_rn-output__squad_test2_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw15_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-165000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.96877956480606, - "f1": 86.71968503618079 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 15, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 15.359982524871826, - "eval_elapsed_time": 22.516427854076028 - }, - "speedup": 2.512658653281453, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 468480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 542208, - "linear_dense_total": 4718592, - "linear_nnz": 1010688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 518912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 852480, - "linear_dense_total": 4718592, - "linear_nnz": 1371392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 345344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 110592, - "linear_dense_total": 4718592, - "linear_nnz": 455936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 212992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 261120, - "linear_dense_total": 4718592, - "linear_nnz": 474112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 608768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 981504, - "linear_dense_total": 4718592, - "linear_nnz": 1590272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 869888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1026048, - "linear_dense_total": 4718592, - "linear_nnz": 1895936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 775936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1093632, - "linear_dense_total": 4718592, - "linear_nnz": 1869568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 618752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1044480, - "linear_dense_total": 4718592, - "linear_nnz": 1663232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 629248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 754176, - "linear_dense_total": 4718592, - "linear_nnz": 1383424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 707584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 588288, - "linear_dense_total": 4718592, - "linear_nnz": 1295872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 463104, - "linear_attention_total": 2359296, - "linear_dense_nnz": 345600, - "linear_dense_total": 4718592, - "linear_nnz": 808704, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 376064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 139776, - "linear_dense_total": 4718592, - "linear_nnz": 515840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 14334976, - "linear_sparsity": 83.1223476080247, - "linear_total": 84934656, - "nnz": 38293506, - "total": 108893186, - "total_sparsity": 64.83388225963009 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test2", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 30, - "output_dir": "output/squad_test2", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test2", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-100000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.74172185430463, - "f1": 86.69521763053608 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 17.306304389953613, - "eval_elapsed_time": 24.480814102105796 - }, - "speedup": 2.230077094204775, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 468992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 940032, - "linear_dense_total": 4718592, - "linear_nnz": 1409024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 606208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1185792, - "linear_dense_total": 4718592, - "linear_nnz": 1792000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 378112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 147456, - "linear_dense_total": 4718592, - "linear_nnz": 525568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 207360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 419328, - "linear_dense_total": 4718592, - "linear_nnz": 626688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 625664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1348608, - "linear_dense_total": 4718592, - "linear_nnz": 1974272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 910592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1320960, - "linear_dense_total": 4718592, - "linear_nnz": 2231552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 828672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1380864, - "linear_dense_total": 4718592, - "linear_nnz": 2209536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 765440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1281024, - "linear_dense_total": 4718592, - "linear_nnz": 2046464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 761088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1003008, - "linear_dense_total": 4718592, - "linear_nnz": 1764096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 792832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 780288, - "linear_dense_total": 4718592, - "linear_nnz": 1573120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 553728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 433152, - "linear_dense_total": 4718592, - "linear_nnz": 986880, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 389888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 182784, - "linear_dense_total": 4718592, - "linear_nnz": 572672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 17711872, - "linear_sparsity": 79.14647231867285, - "linear_total": 84934656, - "nnz": 41670402, - "total": 108893186, - "total_sparsity": 61.73277361909495 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr16_abc16_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.84578997161779, - "f1": 86.78133258210022 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 17.32754041290283, - "eval_elapsed_time": 24.51584801170975 - }, - "speedup": 2.2273439903006693, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 465664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 938496, - "linear_dense_total": 4718592, - "linear_nnz": 1404160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 584192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1182720, - "linear_dense_total": 4718592, - "linear_nnz": 1766912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 370432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 145920, - "linear_dense_total": 4718592, - "linear_nnz": 516352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 200960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 414720, - "linear_dense_total": 4718592, - "linear_nnz": 615680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 615680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1345536, - "linear_dense_total": 4718592, - "linear_nnz": 1961216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 895488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1314816, - "linear_dense_total": 4718592, - "linear_nnz": 2210304, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 812032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1377792, - "linear_dense_total": 4718592, - "linear_nnz": 2189824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 755456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1282560, - "linear_dense_total": 4718592, - "linear_nnz": 2038016, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 739840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 998400, - "linear_dense_total": 4718592, - "linear_nnz": 1738240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 797440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 774144, - "linear_dense_total": 4718592, - "linear_nnz": 1571584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 513792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 430080, - "linear_dense_total": 4718592, - "linear_nnz": 943872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 381184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 182784, - "linear_dense_total": 4718592, - "linear_nnz": 563968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 17520128, - "linear_sparsity": 79.37222704475309, - "linear_total": 84934656, - "nnz": 41478658, - "total": 108893186, - "total_sparsity": 61.90885809879785 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-10000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.90823084200568, - "f1": 88.13888839423888 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": true, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 14.99999 - }, - "speed": { - "cuda_eval_elapsed_time": 40.403957000732426, - "eval_elapsed_time": 47.70582241564989 - }, - "speedup": 0.9552131986644643, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 2151936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4713984, - "linear_dense_total": 4718592, - "linear_nnz": 6865920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 2299648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4709376, - "linear_dense_total": 4718592, - "linear_nnz": 7009024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 2285568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4638720, - "linear_dense_total": 4718592, - "linear_nnz": 6924288, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 2312448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4687872, - "linear_dense_total": 4718592, - "linear_nnz": 7000320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 2330112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4707840, - "linear_dense_total": 4718592, - "linear_nnz": 7037952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 2330112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4710912, - "linear_dense_total": 4718592, - "linear_nnz": 7041024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 2324992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4704768, - "linear_dense_total": 4718592, - "linear_nnz": 7029760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 2337280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4706304, - "linear_dense_total": 4718592, - "linear_nnz": 7043584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 2321664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4684800, - "linear_dense_total": 4718592, - "linear_nnz": 7006464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 2342400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4683264, - "linear_dense_total": 4718592, - "linear_nnz": 7025664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 2296576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4654080, - "linear_dense_total": 4718592, - "linear_nnz": 6950656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 2259200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4646400, - "linear_dense_total": 4718592, - "linear_nnz": 6905600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 83840256, - "linear_sparsity": 1.288519965277779, - "linear_total": 84934656, - "nnz": 107798786, - "total": 108893186, - "total_sparsity": 1.005021563057218 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.99999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-15000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.21192052980132, - "f1": 86.2154189083501 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": true, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 14.99999 - }, - "speed": { - "cuda_eval_elapsed_time": 39.830447120666506, - "eval_elapsed_time": 47.13309640903026 - }, - "speedup": 0.968967104196677, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1914624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4678656, - "linear_dense_total": 4718592, - "linear_nnz": 6593280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 2103296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4669440, - "linear_dense_total": 4718592, - "linear_nnz": 6772736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 2053632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4353024, - "linear_dense_total": 4718592, - "linear_nnz": 6406656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 2100480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4538880, - "linear_dense_total": 4718592, - "linear_nnz": 6639360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 2239232, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4646400, - "linear_dense_total": 4718592, - "linear_nnz": 6885632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 2219520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4657152, - "linear_dense_total": 4718592, - "linear_nnz": 6876672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 2216448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4657152, - "linear_dense_total": 4718592, - "linear_nnz": 6873600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 2226176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4615680, - "linear_dense_total": 4718592, - "linear_nnz": 6841856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 2190848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4595712, - "linear_dense_total": 4718592, - "linear_nnz": 6786560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 2261760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4549632, - "linear_dense_total": 4718592, - "linear_nnz": 6811392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 2178048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4431360, - "linear_dense_total": 4718592, - "linear_nnz": 6609408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 2049792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4349952, - "linear_dense_total": 4718592, - "linear_nnz": 6399744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 80496896, - "linear_sparsity": 5.224910783179015, - "linear_total": 84934656, - "nnz": 104455426, - "total": 108893186, - "total_sparsity": 4.075333051601593 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl14.9999_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-5000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.7038789025544, - "f1": 86.6699349353281 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": true, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 14.9999 - }, - "speed": { - "cuda_eval_elapsed_time": 39.58176746368408, - "eval_elapsed_time": 46.91258597606793 - }, - "speedup": 0.975054816356574, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 2354176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7072768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7077888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7077888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4715520, - "linear_dense_total": 4718592, - "linear_nnz": 7074816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4717056, - "linear_dense_total": 4718592, - "linear_nnz": 7076352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7077888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7077888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7077888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7077888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7077888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 2358272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7076864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 2358272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4718592, - "linear_dense_total": 4718592, - "linear_nnz": 7076864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 84922880, - "linear_sparsity": 0.013864776234573384, - "linear_total": 84934656, - "nnz": 108881410, - "total": 108893186, - "total_sparsity": 0.010814267111258768 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.94701986754967, - "f1": 86.06827252573265 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 14.216132064819336, - "eval_elapsed_time": 21.342612544074655 - }, - "speedup": 2.7148307872632, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 439296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 605184, - "linear_dense_total": 4718592, - "linear_nnz": 1044480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 367616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 809472, - "linear_dense_total": 4718592, - "linear_nnz": 1177088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 276224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 135168, - "linear_dense_total": 4718592, - "linear_nnz": 411392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 178176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 251904, - "linear_dense_total": 4718592, - "linear_nnz": 430080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 492032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 958464, - "linear_dense_total": 4718592, - "linear_nnz": 1450496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 733696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 918528, - "linear_dense_total": 4718592, - "linear_nnz": 1652224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 461056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1050624, - "linear_dense_total": 4718592, - "linear_nnz": 1511680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 580096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 953856, - "linear_dense_total": 4718592, - "linear_nnz": 1533952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 462592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 764928, - "linear_dense_total": 4718592, - "linear_nnz": 1227520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 624384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 571392, - "linear_dense_total": 4718592, - "linear_nnz": 1195776, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 351744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 348672, - "linear_dense_total": 4718592, - "linear_nnz": 700416, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 339968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 139776, - "linear_dense_total": 4718592, - "linear_nnz": 479744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 12814848, - "linear_sparsity": 84.912109375, - "linear_total": 84934656, - "nnz": 36773378, - "total": 108893186, - "total_sparsity": 66.22986308803564 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_abr32_abc32_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.06717123935667, - "f1": 85.28341140334766 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 25 - }, - "speed": { - "cuda_eval_elapsed_time": 13.584790561676026, - "eval_elapsed_time": 20.705443068873137 - }, - "speedup": 2.8410002222816386, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 384768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 502272, - "linear_dense_total": 4718592, - "linear_nnz": 887040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 355840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 701952, - "linear_dense_total": 4718592, - "linear_nnz": 1057792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 256512, - "linear_attention_total": 2359296, - "linear_dense_nnz": 115200, - "linear_dense_total": 4718592, - "linear_nnz": 371712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 150016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 221184, - "linear_dense_total": 4718592, - "linear_nnz": 371200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 413440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 872448, - "linear_dense_total": 4718592, - "linear_nnz": 1285888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 672256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 824832, - "linear_dense_total": 4718592, - "linear_nnz": 1497088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 418560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 932352, - "linear_dense_total": 4718592, - "linear_nnz": 1350912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 523264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 872448, - "linear_dense_total": 4718592, - "linear_nnz": 1395712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 498944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 655872, - "linear_dense_total": 4718592, - "linear_nnz": 1154816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 497664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 562176, - "linear_dense_total": 4718592, - "linear_nnz": 1059840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 297216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 311808, - "linear_dense_total": 4718592, - "linear_nnz": 609024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 316416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 119808, - "linear_dense_total": 4718592, - "linear_nnz": 436224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 11477248, - "linear_sparsity": 86.4869671103395, - "linear_total": 84934656, - "nnz": 35435778, - "total": 108893186, - "total_sparsity": 67.45822277621669 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.80132450331126, - "f1": 87.48291010744668 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 18.270113506317138, - "eval_elapsed_time": 25.450434973929077 - }, - "speedup": 2.1124331270315624, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 627712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1281024, - "linear_dense_total": 4718592, - "linear_nnz": 1908736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 596992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1548288, - "linear_dense_total": 4718592, - "linear_nnz": 2145280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 451584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 182784, - "linear_dense_total": 4718592, - "linear_nnz": 634368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 268288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 559104, - "linear_dense_total": 4718592, - "linear_nnz": 827392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 789504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1709568, - "linear_dense_total": 4718592, - "linear_nnz": 2499072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1180672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1740288, - "linear_dense_total": 4718592, - "linear_nnz": 2920960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1204224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1701888, - "linear_dense_total": 4718592, - "linear_nnz": 2906112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 916480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1600512, - "linear_dense_total": 4718592, - "linear_nnz": 2516992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 909312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1242624, - "linear_dense_total": 4718592, - "linear_nnz": 2151936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 917504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 972288, - "linear_dense_total": 4718592, - "linear_nnz": 1889792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 856064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 542208, - "linear_dense_total": 4718592, - "linear_nnz": 1398272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 611328, - "linear_attention_total": 2359296, - "linear_dense_nnz": 247296, - "linear_dense_total": 4718592, - "linear_nnz": 858624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 22657536, - "linear_sparsity": 73.32356770833333, - "linear_total": 84934656, - "nnz": 46572775, - "total": 108893186, - "total_sparsity": 57.23077199706509 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.27436140018922, - "f1": 87.70461789964966 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 18.375184078216552, - "eval_elapsed_time": 25.600778602063656 - }, - "speedup": 2.1003540884863323, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 645120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1339392, - "linear_dense_total": 4718592, - "linear_nnz": 1984512, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 592896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1571328, - "linear_dense_total": 4718592, - "linear_nnz": 2164224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 480256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 187392, - "linear_dense_total": 4718592, - "linear_nnz": 667648, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 294912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 574464, - "linear_dense_total": 4718592, - "linear_nnz": 869376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 880640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1744896, - "linear_dense_total": 4718592, - "linear_nnz": 2625536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1230848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1761792, - "linear_dense_total": 4718592, - "linear_nnz": 2992640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1214464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1726464, - "linear_dense_total": 4718592, - "linear_nnz": 2940928, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 906240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1629696, - "linear_dense_total": 4718592, - "linear_nnz": 2535936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 943104, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1270272, - "linear_dense_total": 4718592, - "linear_nnz": 2213376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 935936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 987648, - "linear_dense_total": 4718592, - "linear_nnz": 1923584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 872448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 546816, - "linear_dense_total": 4718592, - "linear_nnz": 1419264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 634880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 248832, - "linear_dense_total": 4718592, - "linear_nnz": 883712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 23220736, - "linear_sparsity": 72.66046971450618, - "linear_total": 84934656, - "nnz": 47136529, - "total": 108893186, - "total_sparsity": 56.713059162397904 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.20529801324503, - "f1": 87.11181141207972 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 17.401466148376464, - "eval_elapsed_time": 24.569451212882996 - }, - "speedup": 2.2178816817094407, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 838656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 287232, - "linear_dense_total": 4718592, - "linear_nnz": 1125888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 692224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 496128, - "linear_dense_total": 4718592, - "linear_nnz": 1188352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 489472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 84480, - "linear_dense_total": 4718592, - "linear_nnz": 573952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 293888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 155136, - "linear_dense_total": 4718592, - "linear_nnz": 449024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1089536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 605184, - "linear_dense_total": 4718592, - "linear_nnz": 1694720, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1291264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 671232, - "linear_dense_total": 4718592, - "linear_nnz": 1962496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1384448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 728064, - "linear_dense_total": 4718592, - "linear_nnz": 2112512, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1121280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 662016, - "linear_dense_total": 4718592, - "linear_nnz": 1783296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1127424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 505344, - "linear_dense_total": 4718592, - "linear_nnz": 1632768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 942080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 391680, - "linear_dense_total": 4718592, - "linear_nnz": 1333760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 982016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 222720, - "linear_dense_total": 4718592, - "linear_nnz": 1204736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 645120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 84480, - "linear_dense_total": 4718592, - "linear_nnz": 729600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15791104, - "linear_sparsity": 81.40793788580247, - "linear_total": 84934656, - "nnz": 39702836, - "total": 108893186, - "total_sparsity": 63.53965068117302 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.80794701986756, - "f1": 86.74156854566804 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 15.618790004730226, - "eval_elapsed_time": 22.811819266993552 - }, - "speedup": 2.471023235070233, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 518144, - "linear_attention_total": 2359296, - "linear_dense_nnz": 826368, - "linear_dense_total": 4718592, - "linear_nnz": 1344512, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 516096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1090560, - "linear_dense_total": 4718592, - "linear_nnz": 1606656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 324608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 147456, - "linear_dense_total": 4718592, - "linear_nnz": 472064, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 209920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 345600, - "linear_dense_total": 4718592, - "linear_nnz": 555520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 637952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1204224, - "linear_dense_total": 4718592, - "linear_nnz": 1842176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 913408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1184256, - "linear_dense_total": 4718592, - "linear_nnz": 2097664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 790528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1265664, - "linear_dense_total": 4718592, - "linear_nnz": 2056192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 664576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1201152, - "linear_dense_total": 4718592, - "linear_nnz": 1865728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 629760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 935424, - "linear_dense_total": 4718592, - "linear_nnz": 1565184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 787456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 698880, - "linear_dense_total": 4718592, - "linear_nnz": 1486336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 415744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 428544, - "linear_dense_total": 4718592, - "linear_nnz": 844288, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 423936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 168960, - "linear_dense_total": 4718592, - "linear_nnz": 592896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 16329216, - "linear_sparsity": 80.7743778935185, - "linear_total": 84934656, - "nnz": 40239113, - "total": 108893186, - "total_sparsity": 63.04717083032174 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.18070009460737, - "f1": 85.6109462422114 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 13.485522186279297, - "eval_elapsed_time": 20.651509277056903 - }, - "speedup": 2.86191312967017, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 424960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 482304, - "linear_dense_total": 4718592, - "linear_nnz": 907264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 367616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 706560, - "linear_dense_total": 4718592, - "linear_nnz": 1074176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 256000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 121344, - "linear_dense_total": 4718592, - "linear_nnz": 377344, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 146432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 215040, - "linear_dense_total": 4718592, - "linear_nnz": 361472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 402432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 850944, - "linear_dense_total": 4718592, - "linear_nnz": 1253376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 681984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 826368, - "linear_dense_total": 4718592, - "linear_nnz": 1508352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 405504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 923136, - "linear_dense_total": 4718592, - "linear_nnz": 1328640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 542720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 880128, - "linear_dense_total": 4718592, - "linear_nnz": 1422848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 449536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 645120, - "linear_dense_total": 4718592, - "linear_nnz": 1094656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 577536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 525312, - "linear_dense_total": 4718592, - "linear_nnz": 1102848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 294912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 333312, - "linear_dense_total": 4718592, - "linear_nnz": 628224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 320512, - "linear_attention_total": 2359296, - "linear_dense_nnz": 113664, - "linear_dense_total": 4718592, - "linear_nnz": 434176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 11493376, - "linear_sparsity": 86.46797839506173, - "linear_total": 84934656, - "nnz": 35398714, - "total": 108893186, - "total_sparsity": 67.49225980035152 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl150_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.4484389782403, - "f1": 86.3547925481507 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 150 - }, - "speed": { - "cuda_eval_elapsed_time": 29.783737594604492, - "eval_elapsed_time": 37.12324417894706 - }, - "speedup": 1.2958210124830911, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 30729, - "linear_attention_total": 2359296, - "linear_dense_nnz": 624455, - "linear_dense_total": 4718592, - "linear_nnz": 655184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 77742, - "linear_attention_total": 2359296, - "linear_dense_nnz": 655389, - "linear_dense_total": 4718592, - "linear_nnz": 733131, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 27892, - "linear_attention_total": 2359296, - "linear_dense_nnz": 61389, - "linear_dense_total": 4718592, - "linear_nnz": 89281, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 20781, - "linear_attention_total": 2359296, - "linear_dense_nnz": 51322, - "linear_dense_total": 4718592, - "linear_nnz": 72103, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 70206, - "linear_attention_total": 2359296, - "linear_dense_nnz": 660173, - "linear_dense_total": 4718592, - "linear_nnz": 730379, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 106339, - "linear_attention_total": 2359296, - "linear_dense_nnz": 628112, - "linear_dense_total": 4718592, - "linear_nnz": 734451, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 81845, - "linear_attention_total": 2359296, - "linear_dense_nnz": 574018, - "linear_dense_total": 4718592, - "linear_nnz": 655863, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 68554, - "linear_attention_total": 2359296, - "linear_dense_nnz": 537752, - "linear_dense_total": 4718592, - "linear_nnz": 606306, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 58217, - "linear_attention_total": 2359296, - "linear_dense_nnz": 434629, - "linear_dense_total": 4718592, - "linear_nnz": 492846, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 65705, - "linear_attention_total": 2359296, - "linear_dense_nnz": 313684, - "linear_dense_total": 4718592, - "linear_nnz": 379389, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 39483, - "linear_attention_total": 2359296, - "linear_dense_nnz": 203724, - "linear_dense_total": 4718592, - "linear_nnz": 243207, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 46007, - "linear_attention_total": 2359296, - "linear_dense_nnz": 73599, - "linear_dense_total": 4718592, - "linear_nnz": 119606, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 5511746, - "linear_sparsity": 93.51060419906804, - "linear_total": 84934656, - "nnz": 29470276, - "total": 108893186, - "total_sparsity": 72.93652882926945 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl225_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.39829706717124, - "f1": 85.66626983371626 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 225 - }, - "speed": { - "cuda_eval_elapsed_time": 27.713626304626466, - "eval_elapsed_time": 35.06419681990519 - }, - "speedup": 1.3926143255719736, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 18728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 446655, - "linear_dense_total": 4718592, - "linear_nnz": 465383, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 63059, - "linear_attention_total": 2359296, - "linear_dense_nnz": 464338, - "linear_dense_total": 4718592, - "linear_nnz": 527397, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 21311, - "linear_attention_total": 2359296, - "linear_dense_nnz": 43332, - "linear_dense_total": 4718592, - "linear_nnz": 64643, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 17233, - "linear_attention_total": 2359296, - "linear_dense_nnz": 36806, - "linear_dense_total": 4718592, - "linear_nnz": 54039, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 53761, - "linear_attention_total": 2359296, - "linear_dense_nnz": 462731, - "linear_dense_total": 4718592, - "linear_nnz": 516492, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 84624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 430348, - "linear_dense_total": 4718592, - "linear_nnz": 514972, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 58345, - "linear_attention_total": 2359296, - "linear_dense_nnz": 384869, - "linear_dense_total": 4718592, - "linear_nnz": 443214, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 50615, - "linear_attention_total": 2359296, - "linear_dense_nnz": 346306, - "linear_dense_total": 4718592, - "linear_nnz": 396921, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 41344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 277660, - "linear_dense_total": 4718592, - "linear_nnz": 319004, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 47420, - "linear_attention_total": 2359296, - "linear_dense_nnz": 201763, - "linear_dense_total": 4718592, - "linear_nnz": 249183, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 27562, - "linear_attention_total": 2359296, - "linear_dense_nnz": 133500, - "linear_dense_total": 4718592, - "linear_nnz": 161062, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 34151, - "linear_attention_total": 2359296, - "linear_dense_nnz": 47554, - "linear_dense_total": 4718592, - "linear_nnz": 81705, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 3794015, - "linear_sparsity": 95.5330189363456, - "linear_total": 84934656, - "nnz": 27752545, - "total": 108893186, - "total_sparsity": 74.51397463933142 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 81.40018921475875, - "f1": 88.66263407974378 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 25 - }, - "speed": { - "cuda_eval_elapsed_time": 37.63941863250732, - "eval_elapsed_time": 44.979358388110995 - }, - "speedup": 1.0253716557683228, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 158912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1993831, - "linear_dense_total": 4718592, - "linear_nnz": 2152743, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 234395, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2030737, - "linear_dense_total": 4718592, - "linear_nnz": 2265132, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 134277, - "linear_attention_total": 2359296, - "linear_dense_nnz": 440264, - "linear_dense_total": 4718592, - "linear_nnz": 574541, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 63309, - "linear_attention_total": 2359296, - "linear_dense_nnz": 269756, - "linear_dense_total": 4718592, - "linear_nnz": 333065, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 301048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2114464, - "linear_dense_total": 4718592, - "linear_nnz": 2415512, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 358791, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2106776, - "linear_dense_total": 4718592, - "linear_nnz": 2465567, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 398673, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2058594, - "linear_dense_total": 4718592, - "linear_nnz": 2457267, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 367333, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2043244, - "linear_dense_total": 4718592, - "linear_nnz": 2410577, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 344288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1862492, - "linear_dense_total": 4718592, - "linear_nnz": 2206780, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 304514, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1514517, - "linear_dense_total": 4718592, - "linear_nnz": 1819031, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 265513, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1099308, - "linear_dense_total": 4718592, - "linear_nnz": 1364821, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 201714, - "linear_attention_total": 2359296, - "linear_dense_nnz": 627276, - "linear_dense_total": 4718592, - "linear_nnz": 828990, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 21294026, - "linear_sparsity": 74.92893124804085, - "linear_total": 84934656, - "nnz": 45252556, - "total": 108893186, - "total_sparsity": 58.4431701722824 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl300_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 76.98202459791864, - "f1": 85.40699359564026 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 300 - }, - "speed": { - "cuda_eval_elapsed_time": 25.440285942077637, - "eval_elapsed_time": 32.748252402991056 - }, - "speedup": 1.5170581452285046, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 13195, - "linear_attention_total": 2359296, - "linear_dense_nnz": 344662, - "linear_dense_total": 4718592, - "linear_nnz": 357857, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 53357, - "linear_attention_total": 2359296, - "linear_dense_nnz": 352125, - "linear_dense_total": 4718592, - "linear_nnz": 405482, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 18747, - "linear_attention_total": 2359296, - "linear_dense_nnz": 34723, - "linear_dense_total": 4718592, - "linear_nnz": 53470, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 15957, - "linear_attention_total": 2359296, - "linear_dense_nnz": 30412, - "linear_dense_total": 4718592, - "linear_nnz": 46369, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 43981, - "linear_attention_total": 2359296, - "linear_dense_nnz": 351138, - "linear_dense_total": 4718592, - "linear_nnz": 395119, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 71058, - "linear_attention_total": 2359296, - "linear_dense_nnz": 323059, - "linear_dense_total": 4718592, - "linear_nnz": 394117, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 47705, - "linear_attention_total": 2359296, - "linear_dense_nnz": 287668, - "linear_dense_total": 4718592, - "linear_nnz": 335373, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 40348, - "linear_attention_total": 2359296, - "linear_dense_nnz": 252178, - "linear_dense_total": 4718592, - "linear_nnz": 292526, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 33002, - "linear_attention_total": 2359296, - "linear_dense_nnz": 205112, - "linear_dense_total": 4718592, - "linear_nnz": 238114, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 38753, - "linear_attention_total": 2359296, - "linear_dense_nnz": 150138, - "linear_dense_total": 4718592, - "linear_nnz": 188891, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 22052, - "linear_attention_total": 2359296, - "linear_dense_nnz": 101313, - "linear_dense_total": 4718592, - "linear_nnz": 123365, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 28498, - "linear_attention_total": 2359296, - "linear_dense_nnz": 35917, - "linear_dense_total": 4718592, - "linear_nnz": 64415, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 2895098, - "linear_sparsity": 96.59138196780358, - "linear_total": 84934656, - "nnz": 26853628, - "total": 108893186, - "total_sparsity": 75.33947808267818 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.53926206244087, - "f1": 88.07603620459668 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 50 - }, - "speed": { - "cuda_eval_elapsed_time": 35.31425653076172, - "eval_elapsed_time": 42.675803440622985 - }, - "speedup": 1.092884200230921, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 79341, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1362813, - "linear_dense_total": 4718592, - "linear_nnz": 1442154, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 146964, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1411011, - "linear_dense_total": 4718592, - "linear_nnz": 1557975, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 70746, - "linear_attention_total": 2359296, - "linear_dense_nnz": 191871, - "linear_dense_total": 4718592, - "linear_nnz": 262617, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 36271, - "linear_attention_total": 2359296, - "linear_dense_nnz": 137408, - "linear_dense_total": 4718592, - "linear_nnz": 173679, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 173655, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1463754, - "linear_dense_total": 4718592, - "linear_nnz": 1637409, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 213353, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1442359, - "linear_dense_total": 4718592, - "linear_nnz": 1655712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 221518, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1380230, - "linear_dense_total": 4718592, - "linear_nnz": 1601748, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 179373, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1360274, - "linear_dense_total": 4718592, - "linear_nnz": 1539647, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 168393, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1183896, - "linear_dense_total": 4718592, - "linear_nnz": 1352289, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 159612, - "linear_attention_total": 2359296, - "linear_dense_nnz": 906603, - "linear_dense_total": 4718592, - "linear_nnz": 1066215, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 127230, - "linear_attention_total": 2359296, - "linear_dense_nnz": 600693, - "linear_dense_total": 4718592, - "linear_nnz": 727923, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 105257, - "linear_attention_total": 2359296, - "linear_dense_nnz": 285690, - "linear_dense_total": 4718592, - "linear_nnz": 390947, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 13408315, - "linear_sparsity": 84.21337575088313, - "linear_total": 84934656, - "nnz": 37366845, - "total": 108893186, - "total_sparsity": 65.68486388119823 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl50_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.22705771050141, - "f1": 88.08154392563726 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 50 - }, - "speed": { - "cuda_eval_elapsed_time": 35.30916271209717, - "eval_elapsed_time": 42.719326278194785 - }, - "speedup": 1.0930418633843273, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 87221, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1434572, - "linear_dense_total": 4718592, - "linear_nnz": 1521793, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 157517, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1480327, - "linear_dense_total": 4718592, - "linear_nnz": 1637844, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 75446, - "linear_attention_total": 2359296, - "linear_dense_nnz": 204546, - "linear_dense_total": 4718592, - "linear_nnz": 279992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 38439, - "linear_attention_total": 2359296, - "linear_dense_nnz": 144390, - "linear_dense_total": 4718592, - "linear_nnz": 182829, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 188172, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1535574, - "linear_dense_total": 4718592, - "linear_nnz": 1723746, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 230341, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1512620, - "linear_dense_total": 4718592, - "linear_nnz": 1742961, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 240387, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1447041, - "linear_dense_total": 4718592, - "linear_nnz": 1687428, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 195780, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1427597, - "linear_dense_total": 4718592, - "linear_nnz": 1623377, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 184963, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1245019, - "linear_dense_total": 4718592, - "linear_nnz": 1429982, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 172954, - "linear_attention_total": 2359296, - "linear_dense_nnz": 957245, - "linear_dense_total": 4718592, - "linear_nnz": 1130199, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 138133, - "linear_attention_total": 2359296, - "linear_dense_nnz": 635763, - "linear_dense_total": 4718592, - "linear_nnz": 773896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 112972, - "linear_attention_total": 2359296, - "linear_dense_nnz": 304891, - "linear_dense_total": 4718592, - "linear_nnz": 417863, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 14151910, - "linear_sparsity": 83.3378850677867, - "linear_total": 84934656, - "nnz": 38110440, - "total": 108893186, - "total_sparsity": 65.00199746198996 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.89593188268685, - "f1": 87.64967103979136 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 75 - }, - "speed": { - "cuda_eval_elapsed_time": 32.98558323669434, - "eval_elapsed_time": 40.38167083170265 - }, - "speedup": 1.170038217254783, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 56754, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1054479, - "linear_dense_total": 4718592, - "linear_nnz": 1111233, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 116764, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1106103, - "linear_dense_total": 4718592, - "linear_nnz": 1222867, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 50915, - "linear_attention_total": 2359296, - "linear_dense_nnz": 121878, - "linear_dense_total": 4718592, - "linear_nnz": 172793, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 28303, - "linear_attention_total": 2359296, - "linear_dense_nnz": 94314, - "linear_dense_total": 4718592, - "linear_nnz": 122617, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 127558, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1136881, - "linear_dense_total": 4718592, - "linear_nnz": 1264439, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 163709, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1106395, - "linear_dense_total": 4718592, - "linear_nnz": 1270104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 158018, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1044282, - "linear_dense_total": 4718592, - "linear_nnz": 1202300, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 125746, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1010449, - "linear_dense_total": 4718592, - "linear_nnz": 1136195, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 110023, - "linear_attention_total": 2359296, - "linear_dense_nnz": 861094, - "linear_dense_total": 4718592, - "linear_nnz": 971117, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 113086, - "linear_attention_total": 2359296, - "linear_dense_nnz": 632989, - "linear_dense_total": 4718592, - "linear_nnz": 746075, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 81879, - "linear_attention_total": 2359296, - "linear_dense_nnz": 407092, - "linear_dense_total": 4718592, - "linear_nnz": 488971, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 77365, - "linear_attention_total": 2359296, - "linear_dense_nnz": 173330, - "linear_dense_total": 4718592, - "linear_nnz": 250695, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 9959406, - "linear_sparsity": 88.27403739646628, - "linear_total": 84934656, - "nnz": 33917936, - "total": 108893186, - "total_sparsity": 68.85210429971255 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_abr1_abc1_it0_fw10_r-l1_rfl75_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.8391674550615, - "f1": 87.59923644792065 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_pruning_method": "sigmoied_threshold", - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 75 - }, - "speed": { - "cuda_eval_elapsed_time": 33.06226232147217, - "eval_elapsed_time": 40.42444095481187 - }, - "speedup": 1.1673246261888772, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 56086, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1044542, - "linear_dense_total": 4718592, - "linear_nnz": 1100628, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 115328, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1096450, - "linear_dense_total": 4718592, - "linear_nnz": 1211778, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 50374, - "linear_attention_total": 2359296, - "linear_dense_nnz": 120861, - "linear_dense_total": 4718592, - "linear_nnz": 171235, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 28038, - "linear_attention_total": 2359296, - "linear_dense_nnz": 93754, - "linear_dense_total": 4718592, - "linear_nnz": 121792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 125881, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1127188, - "linear_dense_total": 4718592, - "linear_nnz": 1253069, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 161525, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1096986, - "linear_dense_total": 4718592, - "linear_nnz": 1258511, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 155911, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1035794, - "linear_dense_total": 4718592, - "linear_nnz": 1191705, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 123921, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1001507, - "linear_dense_total": 4718592, - "linear_nnz": 1125428, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 108430, - "linear_attention_total": 2359296, - "linear_dense_nnz": 853489, - "linear_dense_total": 4718592, - "linear_nnz": 961919, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 111505, - "linear_attention_total": 2359296, - "linear_dense_nnz": 627123, - "linear_dense_total": 4718592, - "linear_nnz": 738628, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 80805, - "linear_attention_total": 2359296, - "linear_dense_nnz": 403383, - "linear_dense_total": 4718592, - "linear_nnz": 484188, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 76456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 171492, - "linear_dense_total": 4718592, - "linear_nnz": 247948, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 9866829, - "linear_sparsity": 88.38303530657733, - "linear_total": 84934656, - "nnz": 33825359, - "total": 108893186, - "total_sparsity": 68.93712063856779 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 5, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.81362346263009, - "f1": 88.10463591853348 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 27.2810027923584, - "eval_elapsed_time": 34.61669071530923 - }, - "speedup": 1.4146984734806616, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 644096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4032512, - "linear_dense_total": 4718592, - "linear_nnz": 4676608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 583680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4172800, - "linear_dense_total": 4718592, - "linear_nnz": 4756480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 445440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 890880, - "linear_dense_total": 4718592, - "linear_nnz": 1336320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 272384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 875520, - "linear_dense_total": 4718592, - "linear_nnz": 1147904, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 789504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4324352, - "linear_dense_total": 4718592, - "linear_nnz": 5113856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1028096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4392960, - "linear_dense_total": 4718592, - "linear_nnz": 5421056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1067008, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4359168, - "linear_dense_total": 4718592, - "linear_nnz": 5426176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 943104, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4333568, - "linear_dense_total": 4718592, - "linear_nnz": 5276672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1003520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4161536, - "linear_dense_total": 4718592, - "linear_nnz": 5165056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 908288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3889152, - "linear_dense_total": 4718592, - "linear_nnz": 4797440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 868352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3021824, - "linear_dense_total": 4718592, - "linear_nnz": 3890176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 520192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1206272, - "linear_dense_total": 4718592, - "linear_nnz": 1726464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 48734208, - "linear_sparsity": 42.62152777777778, - "linear_total": 84934656, - "nnz": 72671586, - "total": 108893186, - "total_sparsity": 33.26342201062975 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.53926206244087, - "f1": 87.95145431777735 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 27.25869842529297, - "eval_elapsed_time": 34.5833341376856 - }, - "speedup": 1.4158560472410484, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 614400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4140032, - "linear_dense_total": 4718592, - "linear_nnz": 4754432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 596992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4246528, - "linear_dense_total": 4718592, - "linear_nnz": 4843520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 450560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 881664, - "linear_dense_total": 4718592, - "linear_nnz": 1332224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 266240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 863232, - "linear_dense_total": 4718592, - "linear_nnz": 1129472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 788480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4357120, - "linear_dense_total": 4718592, - "linear_nnz": 5145600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1061888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4426752, - "linear_dense_total": 4718592, - "linear_nnz": 5488640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4414464, - "linear_dense_total": 4718592, - "linear_nnz": 5463040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 918528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4399104, - "linear_dense_total": 4718592, - "linear_nnz": 5317632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 998400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4232192, - "linear_dense_total": 4718592, - "linear_nnz": 5230592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 899072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3939328, - "linear_dense_total": 4718592, - "linear_nnz": 4838400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 819200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3028992, - "linear_dense_total": 4718592, - "linear_nnz": 3848192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 516096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1104896, - "linear_dense_total": 4718592, - "linear_nnz": 1620992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 49012736, - "linear_sparsity": 42.29359567901234, - "linear_total": 84934656, - "nnz": 72950082, - "total": 108893186, - "total_sparsity": 33.00767047076757 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-95000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.72847682119205, - "f1": 88.08831525592305 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 27.293812591552737, - "eval_elapsed_time": 34.635603360366076 - }, - "speedup": 1.4140345133503194, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 621568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4035584, - "linear_dense_total": 4718592, - "linear_nnz": 4657152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 604160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4155392, - "linear_dense_total": 4718592, - "linear_nnz": 4759552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 486400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 957440, - "linear_dense_total": 4718592, - "linear_nnz": 1443840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 286720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 891904, - "linear_dense_total": 4718592, - "linear_nnz": 1178624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 781312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4284416, - "linear_dense_total": 4718592, - "linear_nnz": 5065728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1068032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4340736, - "linear_dense_total": 4718592, - "linear_nnz": 5408768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1087488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4237312, - "linear_dense_total": 4718592, - "linear_nnz": 5324800, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 908288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4281344, - "linear_dense_total": 4718592, - "linear_nnz": 5189632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1019904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4149248, - "linear_dense_total": 4718592, - "linear_nnz": 5169152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 921600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3827712, - "linear_dense_total": 4718592, - "linear_nnz": 4749312, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 851968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3080192, - "linear_dense_total": 4718592, - "linear_nnz": 3932160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 529408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1278976, - "linear_dense_total": 4718592, - "linear_nnz": 1808384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 48687104, - "linear_sparsity": 42.67698688271605, - "linear_total": 84934656, - "nnz": 72624802, - "total": 108893186, - "total_sparsity": 33.306385213120684 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.48249763481552, - "f1": 87.91705961229685 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 26.4900548248291, - "eval_elapsed_time": 33.8130349079147 - }, - "speedup": 1.4569389629649467, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 634880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3140608, - "linear_dense_total": 4718592, - "linear_nnz": 3775488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 602112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3477504, - "linear_dense_total": 4718592, - "linear_nnz": 4079616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 456704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 494592, - "linear_dense_total": 4718592, - "linear_nnz": 951296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 289792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 541696, - "linear_dense_total": 4718592, - "linear_nnz": 831488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1008640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3929088, - "linear_dense_total": 4718592, - "linear_nnz": 4937728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1197056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4089856, - "linear_dense_total": 4718592, - "linear_nnz": 5286912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1181696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3953664, - "linear_dense_total": 4718592, - "linear_nnz": 5135360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1005568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4006912, - "linear_dense_total": 4718592, - "linear_nnz": 5012480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1043456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3677184, - "linear_dense_total": 4718592, - "linear_nnz": 4720640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 931840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2777088, - "linear_dense_total": 4718592, - "linear_nnz": 3708928, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 862208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1448960, - "linear_dense_total": 4718592, - "linear_nnz": 2311168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 600064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 458752, - "linear_dense_total": 4718592, - "linear_nnz": 1058816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 41809920, - "linear_sparsity": 50.774016203703695, - "linear_total": 84934656, - "nnz": 65744386, - "total": 108893186, - "total_sparsity": 39.6248852522324 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.82024597918638, - "f1": 87.30735739624531 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 24.124949531555178, - "eval_elapsed_time": 31.406295038294047 - }, - "speedup": 1.599770932365684, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 889856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1492992, - "linear_dense_total": 4718592, - "linear_nnz": 2382848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 717824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1850368, - "linear_dense_total": 4718592, - "linear_nnz": 2568192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 489472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 328704, - "linear_dense_total": 4718592, - "linear_nnz": 818176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 331776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 388096, - "linear_dense_total": 4718592, - "linear_nnz": 719872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1113088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2802688, - "linear_dense_total": 4718592, - "linear_nnz": 3915776, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1297408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2961408, - "linear_dense_total": 4718592, - "linear_nnz": 4258816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1402880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2897920, - "linear_dense_total": 4718592, - "linear_nnz": 4300800, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1157120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2873344, - "linear_dense_total": 4718592, - "linear_nnz": 4030464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1187840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2473984, - "linear_dense_total": 4718592, - "linear_nnz": 3661824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 979968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1527808, - "linear_dense_total": 4718592, - "linear_nnz": 2507776, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 952320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 610304, - "linear_dense_total": 4718592, - "linear_nnz": 1562624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 642048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 223232, - "linear_dense_total": 4718592, - "linear_nnz": 865280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 31592448, - "linear_sparsity": 62.80381944444444, - "linear_total": 84934656, - "nnz": 55520034, - "total": 108893186, - "total_sparsity": 49.0142257386059 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl10_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.90539262062441, - "f1": 87.36378709007766 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 24.748493873596193, - "eval_elapsed_time": 32.03074289299548 - }, - "speedup": 1.559464313363606, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 949248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1635328, - "linear_dense_total": 4718592, - "linear_nnz": 2584576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 750592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2048000, - "linear_dense_total": 4718592, - "linear_nnz": 2798592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 509952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 352256, - "linear_dense_total": 4718592, - "linear_nnz": 862208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 363520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 420864, - "linear_dense_total": 4718592, - "linear_nnz": 784384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1123328, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2895872, - "linear_dense_total": 4718592, - "linear_nnz": 4019200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1306624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2967552, - "linear_dense_total": 4718592, - "linear_nnz": 4274176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1475584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3105792, - "linear_dense_total": 4718592, - "linear_nnz": 4581376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1285120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2934784, - "linear_dense_total": 4718592, - "linear_nnz": 4219904, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1235968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2500608, - "linear_dense_total": 4718592, - "linear_nnz": 3736576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 983040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1604608, - "linear_dense_total": 4718592, - "linear_nnz": 2587648, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 965632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 661504, - "linear_dense_total": 4718592, - "linear_nnz": 1627136, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 650240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 230400, - "linear_dense_total": 4718592, - "linear_nnz": 880640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 32956416, - "linear_sparsity": 61.19791666666667, - "linear_total": 84934656, - "nnz": 56885634, - "total": 108893186, - "total_sparsity": 47.76015277944021 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.29990539262063, - "f1": 87.09851869948527 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 24.736273986816407, - "eval_elapsed_time": 32.05209435708821 - }, - "speedup": 1.5602346992898202, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 527360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3380224, - "linear_dense_total": 4718592, - "linear_nnz": 3907584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3661824, - "linear_dense_total": 4718592, - "linear_nnz": 4186112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 307200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 595968, - "linear_dense_total": 4718592, - "linear_nnz": 903168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 207872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 615424, - "linear_dense_total": 4718592, - "linear_nnz": 823296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 598016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4031488, - "linear_dense_total": 4718592, - "linear_nnz": 4629504, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 930816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4107264, - "linear_dense_total": 4718592, - "linear_nnz": 5038080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 824320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3997696, - "linear_dense_total": 4718592, - "linear_nnz": 4822016, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 746496, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4027392, - "linear_dense_total": 4718592, - "linear_nnz": 4773888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 670720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3737600, - "linear_dense_total": 4718592, - "linear_nnz": 4408320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 794624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2995200, - "linear_dense_total": 4718592, - "linear_nnz": 3789824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 419840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1756160, - "linear_dense_total": 4718592, - "linear_nnz": 2176000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 411648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 600064, - "linear_dense_total": 4718592, - "linear_nnz": 1011712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 40469504, - "linear_sparsity": 52.35218942901234, - "linear_total": 84934656, - "nnz": 64400930, - "total": 108893186, - "total_sparsity": 40.85862268737366 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.22421948912014, - "f1": 87.0664817371684 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 24.675214500427245, - "eval_elapsed_time": 31.986000607255846 - }, - "speedup": 1.5640955422982379, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 501760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3380224, - "linear_dense_total": 4718592, - "linear_nnz": 3881984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 528384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3656704, - "linear_dense_total": 4718592, - "linear_nnz": 4185088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 313344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 561152, - "linear_dense_total": 4718592, - "linear_nnz": 874496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 200704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 617472, - "linear_dense_total": 4718592, - "linear_nnz": 818176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 581632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4121600, - "linear_dense_total": 4718592, - "linear_nnz": 4703232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 916480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4144128, - "linear_dense_total": 4718592, - "linear_nnz": 5060608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 833536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4060160, - "linear_dense_total": 4718592, - "linear_nnz": 4893696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 741376, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4076544, - "linear_dense_total": 4718592, - "linear_nnz": 4817920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 644096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3815424, - "linear_dense_total": 4718592, - "linear_nnz": 4459520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 757760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2962432, - "linear_dense_total": 4718592, - "linear_nnz": 3720192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 380928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1689600, - "linear_dense_total": 4718592, - "linear_nnz": 2070528, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 395264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 571392, - "linear_dense_total": 4718592, - "linear_nnz": 966656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 40452096, - "linear_sparsity": 52.37268518518518, - "linear_total": 84934656, - "nnz": 64383586, - "total": 108893186, - "total_sparsity": 40.874550222086434 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-65000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.67833491012298, - "f1": 87.14623278516426 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 25.61453672027588, - "eval_elapsed_time": 32.96429116372019 - }, - "speedup": 1.5067378897710322, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 571392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3765248, - "linear_dense_total": 4718592, - "linear_nnz": 4336640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 599040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3852288, - "linear_dense_total": 4718592, - "linear_nnz": 4451328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 374784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 672768, - "linear_dense_total": 4718592, - "linear_nnz": 1047552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 235520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 706560, - "linear_dense_total": 4718592, - "linear_nnz": 942080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 695296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4087808, - "linear_dense_total": 4718592, - "linear_nnz": 4783104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 996352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4050944, - "linear_dense_total": 4718592, - "linear_nnz": 5047296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 923648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4109312, - "linear_dense_total": 4718592, - "linear_nnz": 5032960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 865280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4041728, - "linear_dense_total": 4718592, - "linear_nnz": 4907008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 778240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3858432, - "linear_dense_total": 4718592, - "linear_nnz": 4636672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 883712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3359744, - "linear_dense_total": 4718592, - "linear_nnz": 4243456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 513024, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2305024, - "linear_dense_total": 4718592, - "linear_nnz": 2818048, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 462848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 826368, - "linear_dense_total": 4718592, - "linear_nnz": 1289216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 43535360, - "linear_sparsity": 48.742525077160494, - "linear_total": 84934656, - "nnz": 67469538, - "total": 108893186, - "total_sparsity": 38.04062450702838 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.94985808893094, - "f1": 86.768721062838 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 21.874919250488283, - "eval_elapsed_time": 29.121937923133373 - }, - "speedup": 1.7643216216448254, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 547840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1844224, - "linear_dense_total": 4718592, - "linear_nnz": 2392064, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 546816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2172928, - "linear_dense_total": 4718592, - "linear_nnz": 2719744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 356352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 392192, - "linear_dense_total": 4718592, - "linear_nnz": 748544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 217088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 434176, - "linear_dense_total": 4718592, - "linear_nnz": 651264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 675840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3196928, - "linear_dense_total": 4718592, - "linear_nnz": 3872768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 965632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3111936, - "linear_dense_total": 4718592, - "linear_nnz": 4077568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 896000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3107840, - "linear_dense_total": 4718592, - "linear_nnz": 4003840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 696320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3136512, - "linear_dense_total": 4718592, - "linear_nnz": 3832832, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 755712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2525184, - "linear_dense_total": 4718592, - "linear_nnz": 3280896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 799744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1711104, - "linear_dense_total": 4718592, - "linear_nnz": 2510848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 509952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 747520, - "linear_dense_total": 4718592, - "linear_nnz": 1257472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 420864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 261120, - "linear_dense_total": 4718592, - "linear_nnz": 681984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30029824, - "linear_sparsity": 64.6436149691358, - "linear_total": 84934656, - "nnz": 53955042, - "total": 108893186, - "total_sparsity": 50.45140657377771 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.8713339640492, - "f1": 85.84893170709621 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 19.24458102798462, - "eval_elapsed_time": 26.45731420116499 - }, - "speedup": 2.0054680821187447, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 647168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 789504, - "linear_dense_total": 4718592, - "linear_nnz": 1436672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 591872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1206272, - "linear_dense_total": 4718592, - "linear_nnz": 1798144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 359424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 263168, - "linear_dense_total": 4718592, - "linear_nnz": 622592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 240640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 271360, - "linear_dense_total": 4718592, - "linear_nnz": 512000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 843776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1739776, - "linear_dense_total": 4718592, - "linear_nnz": 2583552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1118208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1857536, - "linear_dense_total": 4718592, - "linear_nnz": 2975744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 913408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1760256, - "linear_dense_total": 4718592, - "linear_nnz": 2673664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 791552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1718272, - "linear_dense_total": 4718592, - "linear_nnz": 2509824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 755712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1330176, - "linear_dense_total": 4718592, - "linear_nnz": 2085888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 827392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 904192, - "linear_dense_total": 4718592, - "linear_nnz": 1731584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 726016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 257024, - "linear_dense_total": 4718592, - "linear_nnz": 983040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 464896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 118784, - "linear_dense_total": 4718592, - "linear_nnz": 583680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 20496384, - "linear_sparsity": 75.86805555555556, - "linear_total": 84934656, - "nnz": 44413282, - "total": 108893186, - "total_sparsity": 59.21390159343854 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl20_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.92809839167455, - "f1": 85.97854187426412 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20 - }, - "speed": { - "cuda_eval_elapsed_time": 19.635457836151122, - "eval_elapsed_time": 26.92565976222977 - }, - "speedup": 1.9655458674518098, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 679936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 869376, - "linear_dense_total": 4718592, - "linear_nnz": 1549312, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 599040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1269760, - "linear_dense_total": 4718592, - "linear_nnz": 1868800, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 379904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 282624, - "linear_dense_total": 4718592, - "linear_nnz": 662528, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 258048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 290816, - "linear_dense_total": 4718592, - "linear_nnz": 548864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 875520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1863680, - "linear_dense_total": 4718592, - "linear_nnz": 2739200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1137664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1950720, - "linear_dense_total": 4718592, - "linear_nnz": 3088384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1033216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1787904, - "linear_dense_total": 4718592, - "linear_nnz": 2821120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 850944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1858560, - "linear_dense_total": 4718592, - "linear_nnz": 2709504, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 798720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1426432, - "linear_dense_total": 4718592, - "linear_nnz": 2225152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 878592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 987136, - "linear_dense_total": 4718592, - "linear_nnz": 1865728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 782336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 267264, - "linear_dense_total": 4718592, - "linear_nnz": 1049600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 504832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 144384, - "linear_dense_total": 4718592, - "linear_nnz": 649216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 21777408, - "linear_sparsity": 74.35980902777779, - "linear_total": 84934656, - "nnz": 45695714, - "total": 108893186, - "total_sparsity": 58.036204395746125 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 76.79280983916746, - "f1": 85.3167029862563 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 30 - }, - "speed": { - "cuda_eval_elapsed_time": 16.872496753692626, - "eval_elapsed_time": 24.01387820020318 - }, - "speedup": 2.2874144573134694, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 512000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 512000, - "linear_dense_total": 4718592, - "linear_nnz": 1024000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 551936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 685056, - "linear_dense_total": 4718592, - "linear_nnz": 1236992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 304128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 197632, - "linear_dense_total": 4718592, - "linear_nnz": 501760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 197632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 220160, - "linear_dense_total": 4718592, - "linear_nnz": 417792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 722944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1211392, - "linear_dense_total": 4718592, - "linear_nnz": 1934336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 954368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1397760, - "linear_dense_total": 4718592, - "linear_nnz": 2352128, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 790528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1238016, - "linear_dense_total": 4718592, - "linear_nnz": 2028544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 584704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1295360, - "linear_dense_total": 4718592, - "linear_nnz": 1880064, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 608256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1018880, - "linear_dense_total": 4718592, - "linear_nnz": 1627136, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 740352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 576512, - "linear_dense_total": 4718592, - "linear_nnz": 1316864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 510976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 162816, - "linear_dense_total": 4718592, - "linear_nnz": 673792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 357376, - "linear_attention_total": 2359296, - "linear_dense_nnz": 94208, - "linear_dense_total": 4718592, - "linear_nnz": 451584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15444992, - "linear_sparsity": 81.81544174382715, - "linear_total": 84934656, - "nnz": 39356610, - "total": 108893186, - "total_sparsity": 63.85760078688487 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl30_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.04824976348155, - "f1": 85.17930403802184 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 30 - }, - "speed": { - "cuda_eval_elapsed_time": 16.85802384185791, - "eval_elapsed_time": 24.0219326200895 - }, - "speedup": 2.289378243109522, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 513024, - "linear_attention_total": 2359296, - "linear_dense_nnz": 519168, - "linear_dense_total": 4718592, - "linear_nnz": 1032192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 523264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 692224, - "linear_dense_total": 4718592, - "linear_nnz": 1215488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 312320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 206848, - "linear_dense_total": 4718592, - "linear_nnz": 519168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 186368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 215040, - "linear_dense_total": 4718592, - "linear_nnz": 401408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 683008, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1239040, - "linear_dense_total": 4718592, - "linear_nnz": 1922048, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 945152, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1374208, - "linear_dense_total": 4718592, - "linear_nnz": 2319360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 809984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1235968, - "linear_dense_total": 4718592, - "linear_nnz": 2045952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 581632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1265664, - "linear_dense_total": 4718592, - "linear_nnz": 1847296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 600064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1007616, - "linear_dense_total": 4718592, - "linear_nnz": 1607680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 708608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 578560, - "linear_dense_total": 4718592, - "linear_nnz": 1287168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 473088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 158720, - "linear_dense_total": 4718592, - "linear_nnz": 631808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 352256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 90112, - "linear_dense_total": 4718592, - "linear_nnz": 442368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15271936, - "linear_sparsity": 82.0191936728395, - "linear_total": 84934656, - "nnz": 39183362, - "total": 108893186, - "total_sparsity": 64.01669981444019 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.70104068117313, - "f1": 85.88451743537976 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 20.68525614929199, - "eval_elapsed_time": 27.97377561684698 - }, - "speedup": 1.8657923656745288, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 413696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2119680, - "linear_dense_total": 4718592, - "linear_nnz": 2533376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 364544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2476032, - "linear_dense_total": 4718592, - "linear_nnz": 2840576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 237568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 386048, - "linear_dense_total": 4718592, - "linear_nnz": 623616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 144384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 460800, - "linear_dense_total": 4718592, - "linear_nnz": 605184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 397312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3445760, - "linear_dense_total": 4718592, - "linear_nnz": 3843072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 666624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3402752, - "linear_dense_total": 4718592, - "linear_nnz": 4069376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 492544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3339264, - "linear_dense_total": 4718592, - "linear_nnz": 3831808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 519168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3194880, - "linear_dense_total": 4718592, - "linear_nnz": 3714048, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 448512, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2751488, - "linear_dense_total": 4718592, - "linear_nnz": 3200000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 576512, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1839104, - "linear_dense_total": 4718592, - "linear_nnz": 2415616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 271360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 940032, - "linear_dense_total": 4718592, - "linear_nnz": 1211392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 317440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 302080, - "linear_dense_total": 4718592, - "linear_nnz": 619520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 29507584, - "linear_sparsity": 65.25848765432099, - "linear_total": 84934656, - "nnz": 53430466, - "total": 108893186, - "total_sparsity": 50.93314103235074 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-85000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.68211920529801, - "f1": 86.11161494070976 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 21.577418830871583, - "eval_elapsed_time": 28.903804030269384 - }, - "speedup": 1.7886473497076825, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 459776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2385920, - "linear_dense_total": 4718592, - "linear_nnz": 2845696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 374784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2798592, - "linear_dense_total": 4718592, - "linear_nnz": 3173376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 254976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 416768, - "linear_dense_total": 4718592, - "linear_nnz": 671744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 165888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 466944, - "linear_dense_total": 4718592, - "linear_nnz": 632832, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 411648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3454976, - "linear_dense_total": 4718592, - "linear_nnz": 3866624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 727040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3496960, - "linear_dense_total": 4718592, - "linear_nnz": 4224000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 541696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3412992, - "linear_dense_total": 4718592, - "linear_nnz": 3954688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 545792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3447808, - "linear_dense_total": 4718592, - "linear_nnz": 3993600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 493568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2933760, - "linear_dense_total": 4718592, - "linear_nnz": 3427328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 641024, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2000896, - "linear_dense_total": 4718592, - "linear_nnz": 2641920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 288768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1004544, - "linear_dense_total": 4718592, - "linear_nnz": 1293312, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 338944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 339968, - "linear_dense_total": 4718592, - "linear_nnz": 678912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 31404032, - "linear_sparsity": 63.025655864197525, - "linear_total": 84934656, - "nnz": 55329122, - "total": 108893186, - "total_sparsity": 49.1895461668281 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.96594134342479, - "f1": 86.01491496793933 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 21.28239717102051, - "eval_elapsed_time": 28.641465611290187 - }, - "speedup": 1.8134420053923117, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 435200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2226176, - "linear_dense_total": 4718592, - "linear_nnz": 2661376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 359424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2727936, - "linear_dense_total": 4718592, - "linear_nnz": 3087360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 252928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 411648, - "linear_dense_total": 4718592, - "linear_nnz": 664576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 158720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 487424, - "linear_dense_total": 4718592, - "linear_nnz": 646144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 421888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3473408, - "linear_dense_total": 4718592, - "linear_nnz": 3895296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 710656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3451904, - "linear_dense_total": 4718592, - "linear_nnz": 4162560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 547840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3437568, - "linear_dense_total": 4718592, - "linear_nnz": 3985408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 556032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3325952, - "linear_dense_total": 4718592, - "linear_nnz": 3881984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 512000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2828288, - "linear_dense_total": 4718592, - "linear_nnz": 3340288, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 622592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1991680, - "linear_dense_total": 4718592, - "linear_nnz": 2614272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 276480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 979968, - "linear_dense_total": 4718592, - "linear_nnz": 1256448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 337920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 330752, - "linear_dense_total": 4718592, - "linear_nnz": 668672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30864384, - "linear_sparsity": 63.66102430555556, - "linear_total": 84934656, - "nnz": 54788706, - "total": 108893186, - "total_sparsity": 49.68582699012958 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 76.92526017029329, - "f1": 85.21713644985097 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 17.63341423416138, - "eval_elapsed_time": 24.82955563813448 - }, - "speedup": 2.1887078981336363, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 443392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1026048, - "linear_dense_total": 4718592, - "linear_nnz": 1469440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 396288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1296384, - "linear_dense_total": 4718592, - "linear_nnz": 1692672, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 237568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 308224, - "linear_dense_total": 4718592, - "linear_nnz": 545792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 152576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 315392, - "linear_dense_total": 4718592, - "linear_nnz": 467968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 578560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2113536, - "linear_dense_total": 4718592, - "linear_nnz": 2692096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 755712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1973248, - "linear_dense_total": 4718592, - "linear_nnz": 2728960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 565248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1966080, - "linear_dense_total": 4718592, - "linear_nnz": 2531328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 546816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1887232, - "linear_dense_total": 4718592, - "linear_nnz": 2434048, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 476160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1502208, - "linear_dense_total": 4718592, - "linear_nnz": 1978368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 637952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1000448, - "linear_dense_total": 4718592, - "linear_nnz": 1638400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 310272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 310272, - "linear_dense_total": 4718592, - "linear_nnz": 620544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 313344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 144384, - "linear_dense_total": 4718592, - "linear_nnz": 457728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 19257344, - "linear_sparsity": 77.3268711419753, - "linear_total": 84934656, - "nnz": 43172098, - "total": 108893186, - "total_sparsity": 60.35371946964616 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.08609271523179, - "f1": 85.20287591064626 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 17.564620765686037, - "eval_elapsed_time": 24.740368818864226 - }, - "speedup": 2.1972801758844964, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 455680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1007616, - "linear_dense_total": 4718592, - "linear_nnz": 1463296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 399360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1300480, - "linear_dense_total": 4718592, - "linear_nnz": 1699840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 240640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 305152, - "linear_dense_total": 4718592, - "linear_nnz": 545792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 144384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 329728, - "linear_dense_total": 4718592, - "linear_nnz": 474112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 544768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2180096, - "linear_dense_total": 4718592, - "linear_nnz": 2724864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 731136, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1939456, - "linear_dense_total": 4718592, - "linear_nnz": 2670592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 557056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1941504, - "linear_dense_total": 4718592, - "linear_nnz": 2498560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 527360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1880064, - "linear_dense_total": 4718592, - "linear_nnz": 2407424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 472064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1456128, - "linear_dense_total": 4718592, - "linear_nnz": 1928192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 607232, - "linear_attention_total": 2359296, - "linear_dense_nnz": 977920, - "linear_dense_total": 4718592, - "linear_nnz": 1585152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 289792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 317440, - "linear_dense_total": 4718592, - "linear_nnz": 607232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 308224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 147456, - "linear_dense_total": 4718592, - "linear_nnz": 455680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 19060736, - "linear_sparsity": 77.55835262345678, - "linear_total": 84934656, - "nnz": 42975330, - "total": 108893186, - "total_sparsity": 60.53441764482857 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.01986754966887, - "f1": 85.2617013700351 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 18.277880432128907, - "eval_elapsed_time": 25.53750513214618 - }, - "speedup": 2.1115354785629177, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 480256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1234944, - "linear_dense_total": 4718592, - "linear_nnz": 1715200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 400384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1495040, - "linear_dense_total": 4718592, - "linear_nnz": 1895424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 267264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 326656, - "linear_dense_total": 4718592, - "linear_nnz": 593920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 163840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 337920, - "linear_dense_total": 4718592, - "linear_nnz": 501760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 594944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2417664, - "linear_dense_total": 4718592, - "linear_nnz": 3012608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 813056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2281472, - "linear_dense_total": 4718592, - "linear_nnz": 3094528, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 599040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2163712, - "linear_dense_total": 4718592, - "linear_nnz": 2762752, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 562176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2145280, - "linear_dense_total": 4718592, - "linear_nnz": 2707456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 531456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1701888, - "linear_dense_total": 4718592, - "linear_nnz": 2233344, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 678912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1062912, - "linear_dense_total": 4718592, - "linear_nnz": 1741824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 338944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 370688, - "linear_dense_total": 4718592, - "linear_nnz": 709632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 359424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 164864, - "linear_dense_total": 4718592, - "linear_nnz": 524288, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 21492736, - "linear_sparsity": 74.6949749228395, - "linear_total": 84934656, - "nnz": 45409666, - "total": 108893186, - "total_sparsity": 58.29889117212532 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.5_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 76.98202459791864, - "f1": 85.22056943761015 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 17.92396342086792, - "eval_elapsed_time": 25.119796799961478 - }, - "speedup": 2.153228730674472, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 458752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1139712, - "linear_dense_total": 4718592, - "linear_nnz": 1598464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 398336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1427456, - "linear_dense_total": 4718592, - "linear_nnz": 1825792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 271360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 326656, - "linear_dense_total": 4718592, - "linear_nnz": 598016, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 162816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 347136, - "linear_dense_total": 4718592, - "linear_nnz": 509952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 596992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2257920, - "linear_dense_total": 4718592, - "linear_nnz": 2854912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 781312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2123776, - "linear_dense_total": 4718592, - "linear_nnz": 2905088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 620544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2023424, - "linear_dense_total": 4718592, - "linear_nnz": 2643968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 573440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1970176, - "linear_dense_total": 4718592, - "linear_nnz": 2543616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 460800, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1588224, - "linear_dense_total": 4718592, - "linear_nnz": 2049024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 638976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1069056, - "linear_dense_total": 4718592, - "linear_nnz": 1708032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 307200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 359424, - "linear_dense_total": 4718592, - "linear_nnz": 666624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 327680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 161792, - "linear_dense_total": 4718592, - "linear_nnz": 489472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 20392960, - "linear_sparsity": 75.98982445987654, - "linear_total": 84934656, - "nnz": 44308674, - "total": 108893186, - "total_sparsity": 59.309966373837206 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl5_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 81.51371807000946, - "f1": 88.67903677006836 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5 - }, - "speed": { - "cuda_eval_elapsed_time": 31.30978426361084, - "eval_elapsed_time": 38.71227815328166 - }, - "speedup": 1.232662374177603, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 804864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4457472, - "linear_dense_total": 4718592, - "linear_nnz": 5262336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 771072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4464640, - "linear_dense_total": 4718592, - "linear_nnz": 5235712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 614400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1607680, - "linear_dense_total": 4718592, - "linear_nnz": 2222080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 389120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1230848, - "linear_dense_total": 4718592, - "linear_nnz": 1619968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1152000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4517888, - "linear_dense_total": 4718592, - "linear_nnz": 5669888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1312768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4562944, - "linear_dense_total": 4718592, - "linear_nnz": 5875712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1501184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4555776, - "linear_dense_total": 4718592, - "linear_nnz": 6056960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1377280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4520960, - "linear_dense_total": 4718592, - "linear_nnz": 5898240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1357824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4473856, - "linear_dense_total": 4718592, - "linear_nnz": 5831680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1192960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4290560, - "linear_dense_total": 4718592, - "linear_nnz": 5483520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1069056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3889152, - "linear_dense_total": 4718592, - "linear_nnz": 4958208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 718848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2518016, - "linear_dense_total": 4718592, - "linear_nnz": 3236864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 57351168, - "linear_sparsity": 32.47612847222222, - "linear_total": 84934656, - "nnz": 81295202, - "total": 108893186, - "total_sparsity": 25.344087186502197 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_stl50_est5000_rn-output__squad_test3_dpm-sigmoied_threshold_apme-sigmoied_threshold_aowd0_bm1_dbr32_dbc32_abr32_abc32_it0_fw10_r-l1_rfl5_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-80000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 81.47587511825922, - "f1": 88.73698799207777 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 32, - "dense_block_rows": 32, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5 - }, - "speed": { - "cuda_eval_elapsed_time": 31.817585739135744, - "eval_elapsed_time": 39.2419764213264 - }, - "speedup": 1.2129893613486789, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 921600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4316160, - "linear_dense_total": 4718592, - "linear_nnz": 5237760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 829440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4311040, - "linear_dense_total": 4718592, - "linear_nnz": 5140480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 671744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2001920, - "linear_dense_total": 4718592, - "linear_nnz": 2673664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 409600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1304576, - "linear_dense_total": 4718592, - "linear_nnz": 1714176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1221632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4419584, - "linear_dense_total": 4718592, - "linear_nnz": 5641216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1386496, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4429824, - "linear_dense_total": 4718592, - "linear_nnz": 5816320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1540096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4457472, - "linear_dense_total": 4718592, - "linear_nnz": 5997568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1548288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4420608, - "linear_dense_total": 4718592, - "linear_nnz": 5968896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1364992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4320256, - "linear_dense_total": 4718592, - "linear_nnz": 5685248, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1272832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4186112, - "linear_dense_total": 4718592, - "linear_nnz": 5458944, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1173504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3787776, - "linear_dense_total": 4718592, - "linear_nnz": 4961280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 727040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2839552, - "linear_dense_total": 4718592, - "linear_nnz": 3566592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 57862144, - "linear_sparsity": 31.87451774691358, - "linear_total": 84934656, - "nnz": 81807426, - "total": 108893186, - "total_sparsity": 24.873695953757846 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test3", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test3", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test3", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.66887417218543, - "f1": 87.3881230572442 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 17.326403350830077, - "eval_elapsed_time": 24.523588876239955 - }, - "speedup": 2.227490161916501, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 643072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 634368, - "linear_dense_total": 4718592, - "linear_nnz": 1277440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 622592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 916992, - "linear_dense_total": 4718592, - "linear_nnz": 1539584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 463872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 112128, - "linear_dense_total": 4718592, - "linear_nnz": 576000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 278528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 313344, - "linear_dense_total": 4718592, - "linear_nnz": 591872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1051648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1016832, - "linear_dense_total": 4718592, - "linear_nnz": 2068480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1257472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1076736, - "linear_dense_total": 4718592, - "linear_nnz": 2334208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1315840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1158144, - "linear_dense_total": 4718592, - "linear_nnz": 2473984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1004544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1073664, - "linear_dense_total": 4718592, - "linear_nnz": 2078208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1004544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 815616, - "linear_dense_total": 4718592, - "linear_nnz": 1820160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 925696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 629760, - "linear_dense_total": 4718592, - "linear_nnz": 1555456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 899072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 337920, - "linear_dense_total": 4718592, - "linear_nnz": 1236992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 523264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 139776, - "linear_dense_total": 4718592, - "linear_nnz": 663040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 18215424, - "linear_sparsity": 78.55360243055556, - "linear_total": 84934656, - "nnz": 42128141, - "total": 108893186, - "total_sparsity": 61.31241765669342 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.39451277199622, - "f1": 87.14755939306319 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 17.057066314697266, - "eval_elapsed_time": 24.182081679347903 - }, - "speedup": 2.262663009764823, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 809984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 297984, - "linear_dense_total": 4718592, - "linear_nnz": 1107968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 720896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 483840, - "linear_dense_total": 4718592, - "linear_nnz": 1204736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 478208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 73728, - "linear_dense_total": 4718592, - "linear_nnz": 551936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 312320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 159744, - "linear_dense_total": 4718592, - "linear_nnz": 472064, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1098752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 619008, - "linear_dense_total": 4718592, - "linear_nnz": 1717760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1309696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 657408, - "linear_dense_total": 4718592, - "linear_nnz": 1967104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1362944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 705024, - "linear_dense_total": 4718592, - "linear_nnz": 2067968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1074176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 668160, - "linear_dense_total": 4718592, - "linear_nnz": 1742336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1049600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 516096, - "linear_dense_total": 4718592, - "linear_nnz": 1565696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 958464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 384000, - "linear_dense_total": 4718592, - "linear_nnz": 1342464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 949248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 204288, - "linear_dense_total": 4718592, - "linear_nnz": 1153536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 636928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 92160, - "linear_dense_total": 4718592, - "linear_nnz": 729088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15622656, - "linear_sparsity": 81.6062644675926, - "linear_total": 84934656, - "nnz": 39533983, - "total": 108893186, - "total_sparsity": 63.694713643514845 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl2--2021-01-21--00-53-13/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.67549668874172, - "f1": 86.51098653495667 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 2.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 17.252509830474853, - "eval_elapsed_time": 24.480217491276562 - }, - "speedup": 2.2370306340702912, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 864256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 127488, - "linear_dense_total": 4718592, - "linear_nnz": 991744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 748544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 216576, - "linear_dense_total": 4718592, - "linear_nnz": 965120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 502784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 53760, - "linear_dense_total": 4718592, - "linear_nnz": 556544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 360448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 81408, - "linear_dense_total": 4718592, - "linear_nnz": 441856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1163264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 324096, - "linear_dense_total": 4718592, - "linear_nnz": 1487360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1389568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 377856, - "linear_dense_total": 4718592, - "linear_nnz": 1767424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1449984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 414720, - "linear_dense_total": 4718592, - "linear_nnz": 1864704, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1349632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 364032, - "linear_dense_total": 4718592, - "linear_nnz": 1713664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1187840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 293376, - "linear_dense_total": 4718592, - "linear_nnz": 1481216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 964608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 225792, - "linear_dense_total": 4718592, - "linear_nnz": 1190400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1063936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 127488, - "linear_dense_total": 4718592, - "linear_nnz": 1191424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 650240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 58368, - "linear_dense_total": 4718592, - "linear_nnz": 708608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 14360064, - "linear_sparsity": 83.0928096064815, - "linear_total": 84934656, - "nnz": 38271273, - "total": 108893186, - "total_sparsity": 64.85429951512302 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.3349101229896, - "f1": 86.4116267700138 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 14.760263885498047, - "eval_elapsed_time": 21.897933847736567 - }, - "speedup": 2.6147495264830645, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 522240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 411648, - "linear_dense_total": 4718592, - "linear_nnz": 933888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 523264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 592896, - "linear_dense_total": 4718592, - "linear_nnz": 1116160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 367616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 87552, - "linear_dense_total": 4718592, - "linear_nnz": 455168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 222208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 199680, - "linear_dense_total": 4718592, - "linear_nnz": 421888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 675840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 698880, - "linear_dense_total": 4718592, - "linear_nnz": 1374720, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 977920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 714240, - "linear_dense_total": 4718592, - "linear_nnz": 1692160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 825344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 834048, - "linear_dense_total": 4718592, - "linear_nnz": 1659392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 672768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 743424, - "linear_dense_total": 4718592, - "linear_nnz": 1416192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 638976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 568320, - "linear_dense_total": 4718592, - "linear_nnz": 1207296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 785408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 450048, - "linear_dense_total": 4718592, - "linear_nnz": 1235456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 514048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 264192, - "linear_dense_total": 4718592, - "linear_nnz": 778240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 424960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 101376, - "linear_dense_total": 4718592, - "linear_nnz": 526336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 12816896, - "linear_sparsity": 84.9096981095679, - "linear_total": 84934656, - "nnz": 36724619, - "total": 108893186, - "total_sparsity": 66.2746399944621 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.37275307473983, - "f1": 86.39441106336629 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 14.746898548126222, - "eval_elapsed_time": 21.86237431317568 - }, - "speedup": 2.61711931355729, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 519168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 411648, - "linear_dense_total": 4718592, - "linear_nnz": 930816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 536576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 592896, - "linear_dense_total": 4718592, - "linear_nnz": 1129472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 356352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 87552, - "linear_dense_total": 4718592, - "linear_nnz": 443904, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 226304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 199680, - "linear_dense_total": 4718592, - "linear_nnz": 425984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 667648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 698880, - "linear_dense_total": 4718592, - "linear_nnz": 1366528, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 967680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 714240, - "linear_dense_total": 4718592, - "linear_nnz": 1681920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 835584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 834048, - "linear_dense_total": 4718592, - "linear_nnz": 1669632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 668672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 743424, - "linear_dense_total": 4718592, - "linear_nnz": 1412096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 653312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 568320, - "linear_dense_total": 4718592, - "linear_nnz": 1221632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 787456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 450048, - "linear_dense_total": 4718592, - "linear_nnz": 1237504, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 493568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 264192, - "linear_dense_total": 4718592, - "linear_nnz": 757760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 424960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 101376, - "linear_dense_total": 4718592, - "linear_nnz": 526336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 12803584, - "linear_sparsity": 84.92537133487654, - "linear_total": 84934656, - "nnz": 36711275, - "total": 108893186, - "total_sparsity": 66.28689420474849 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.8240302743614, - "f1": 86.11992485005756 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 14.268565601348877, - "eval_elapsed_time": 21.374552259687334 - }, - "speedup": 2.704854439028025, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 550912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 181248, - "linear_dense_total": 4718592, - "linear_nnz": 732160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 535552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 299520, - "linear_dense_total": 4718592, - "linear_nnz": 835072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 364544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 58368, - "linear_dense_total": 4718592, - "linear_nnz": 422912, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 239616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 96768, - "linear_dense_total": 4718592, - "linear_nnz": 336384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 721920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 407040, - "linear_dense_total": 4718592, - "linear_nnz": 1128960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1111040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 440832, - "linear_dense_total": 4718592, - "linear_nnz": 1551872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 892928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 496128, - "linear_dense_total": 4718592, - "linear_nnz": 1389056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 663552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 433152, - "linear_dense_total": 4718592, - "linear_nnz": 1096704, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 662528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 337920, - "linear_dense_total": 4718592, - "linear_nnz": 1000448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 801792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 268800, - "linear_dense_total": 4718592, - "linear_nnz": 1070592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 645120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 158208, - "linear_dense_total": 4718592, - "linear_nnz": 803328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 424960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 73728, - "linear_dense_total": 4718592, - "linear_nnz": 498688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 10866176, - "linear_sparsity": 87.20642843364197, - "linear_total": 84934656, - "nnz": 34772839, - "total": 108893186, - "total_sparsity": 68.06702028169144 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl2--2021-01-21--00-54-43/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 76.9914853358562, - "f1": 85.26341062121247 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 2.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 14.846498733520509, - "eval_elapsed_time": 21.962527931667864 - }, - "speedup": 2.599561936999493, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 598016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 76800, - "linear_dense_total": 4718592, - "linear_nnz": 674816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 621568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 129024, - "linear_dense_total": 4718592, - "linear_nnz": 750592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 395264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 36864, - "linear_dense_total": 4718592, - "linear_nnz": 432128, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 238592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 52224, - "linear_dense_total": 4718592, - "linear_nnz": 290816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 937984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 199680, - "linear_dense_total": 4718592, - "linear_nnz": 1137664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1193984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 264192, - "linear_dense_total": 4718592, - "linear_nnz": 1458176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1057792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 278016, - "linear_dense_total": 4718592, - "linear_nnz": 1335808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 614400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 228864, - "linear_dense_total": 4718592, - "linear_nnz": 843264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 759808, - "linear_attention_total": 2359296, - "linear_dense_nnz": 188928, - "linear_dense_total": 4718592, - "linear_nnz": 948736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 830464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 148992, - "linear_dense_total": 4718592, - "linear_nnz": 979456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 753664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 79872, - "linear_dense_total": 4718592, - "linear_nnz": 833536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 432128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 46080, - "linear_dense_total": 4718592, - "linear_nnz": 478208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 10163200, - "linear_sparsity": 88.03409529320987, - "linear_total": 84934656, - "nnz": 34069864, - "total": 108893186, - "total_sparsity": 68.71258409134985 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l30-dl0-25--2021-01-23--20-20-19/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.06054872280038, - "f1": 86.20063710644014 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 30.0 - }, - "speed": { - "cuda_eval_elapsed_time": 14.394198833465577, - "eval_elapsed_time": 21.72890411503613 - }, - "speedup": 2.681246344578876, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 455680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 597504, - "linear_dense_total": 4718592, - "linear_nnz": 1053184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 364544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 854016, - "linear_dense_total": 4718592, - "linear_nnz": 1218560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 286720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 118272, - "linear_dense_total": 4718592, - "linear_nnz": 404992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 162816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 276480, - "linear_dense_total": 4718592, - "linear_nnz": 439296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 529408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 973824, - "linear_dense_total": 4718592, - "linear_nnz": 1503232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 749568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 964608, - "linear_dense_total": 4718592, - "linear_nnz": 1714176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 578560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1047552, - "linear_dense_total": 4718592, - "linear_nnz": 1626112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 600064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 992256, - "linear_dense_total": 4718592, - "linear_nnz": 1592320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 546816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 775680, - "linear_dense_total": 4718592, - "linear_nnz": 1322496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 686080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 615936, - "linear_dense_total": 4718592, - "linear_nnz": 1302016, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 335872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 342528, - "linear_dense_total": 4718592, - "linear_nnz": 678400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 358400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 135168, - "linear_dense_total": 4718592, - "linear_nnz": 493568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 13348352, - "linear_sparsity": 84.28397472993827, - "linear_total": 84934656, - "nnz": 37255475, - "total": 108893186, - "total_sparsity": 65.78713841653968 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l30-dl0-5--2021-01-23--20-19-50/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.11447492904446, - "f1": 85.59611837921153 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 30.0 - }, - "speed": { - "cuda_eval_elapsed_time": 13.499527885437011, - "eval_elapsed_time": 20.856850353069603 - }, - "speedup": 2.8589439077351635, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 432128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 290304, - "linear_dense_total": 4718592, - "linear_nnz": 722432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 489472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 459264, - "linear_dense_total": 4718592, - "linear_nnz": 948736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 309248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 73728, - "linear_dense_total": 4718592, - "linear_nnz": 382976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 198656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 153600, - "linear_dense_total": 4718592, - "linear_nnz": 352256, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 578560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 557568, - "linear_dense_total": 4718592, - "linear_nnz": 1136128, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 779264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 583680, - "linear_dense_total": 4718592, - "linear_nnz": 1362944, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 575488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 665088, - "linear_dense_total": 4718592, - "linear_nnz": 1240576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 540672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 614400, - "linear_dense_total": 4718592, - "linear_nnz": 1155072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 590848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 463872, - "linear_dense_total": 4718592, - "linear_nnz": 1054720, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 696320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 370176, - "linear_dense_total": 4718592, - "linear_nnz": 1066496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 388096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 235008, - "linear_dense_total": 4718592, - "linear_nnz": 623104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 363520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 89088, - "linear_dense_total": 4718592, - "linear_nnz": 452608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 10498048, - "linear_sparsity": 87.63985339506173, - "linear_total": 84934656, - "nnz": 34403512, - "total": 108893186, - "total_sparsity": 68.40618475429675 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l30-dl0-5--2021-01-23--20-19-50/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.37937559129612, - "f1": 85.69020560735045 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 30.0 - }, - "speed": { - "cuda_eval_elapsed_time": 13.492438529968261, - "eval_elapsed_time": 20.86975116888061 - }, - "speedup": 2.860446087610368, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 451584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 290304, - "linear_dense_total": 4718592, - "linear_nnz": 741888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 495616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 459264, - "linear_dense_total": 4718592, - "linear_nnz": 954880, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 296960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 73728, - "linear_dense_total": 4718592, - "linear_nnz": 370688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 194560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 153600, - "linear_dense_total": 4718592, - "linear_nnz": 348160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 583680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 557568, - "linear_dense_total": 4718592, - "linear_nnz": 1141248, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 789504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 583680, - "linear_dense_total": 4718592, - "linear_nnz": 1373184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 582656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 665088, - "linear_dense_total": 4718592, - "linear_nnz": 1247744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 548864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 614400, - "linear_dense_total": 4718592, - "linear_nnz": 1163264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 578560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 463872, - "linear_dense_total": 4718592, - "linear_nnz": 1042432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 715776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 370176, - "linear_dense_total": 4718592, - "linear_nnz": 1085952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 375808, - "linear_attention_total": 2359296, - "linear_dense_nnz": 235008, - "linear_dense_total": 4718592, - "linear_nnz": 610816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 347136, - "linear_attention_total": 2359296, - "linear_dense_nnz": 89088, - "linear_dense_total": 4718592, - "linear_nnz": 436224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 10516480, - "linear_sparsity": 87.61815200617285, - "linear_total": 84934656, - "nnz": 34421912, - "total": 108893186, - "total_sparsity": 68.3892874619354 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.48249763481552, - "f1": 88.07285498416482 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 20.669778549194337, - "eval_elapsed_time": 27.982159624807537 - }, - "speedup": 1.8671894773093938, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 991232, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1107456, - "linear_dense_total": 4718592, - "linear_nnz": 2098688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 730112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1273344, - "linear_dense_total": 4718592, - "linear_nnz": 2003456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 624640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 162816, - "linear_dense_total": 4718592, - "linear_nnz": 787456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 419840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 477696, - "linear_dense_total": 4718592, - "linear_nnz": 897536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1225728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1505280, - "linear_dense_total": 4718592, - "linear_nnz": 2731008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1433600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1543680, - "linear_dense_total": 4718592, - "linear_nnz": 2977280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1566720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1609728, - "linear_dense_total": 4718592, - "linear_nnz": 3176448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1508352, - "linear_dense_total": 4718592, - "linear_nnz": 3081216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1361920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1125888, - "linear_dense_total": 4718592, - "linear_nnz": 2487808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1166336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 837120, - "linear_dense_total": 4718592, - "linear_nnz": 2003456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1148928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 468480, - "linear_dense_total": 4718592, - "linear_nnz": 1617408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 738304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 207360, - "linear_dense_total": 4718592, - "linear_nnz": 945664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 24807424, - "linear_sparsity": 70.79234182098766, - "linear_total": 84934656, - "nnz": 48725622, - "total": 108893186, - "total_sparsity": 55.25374562922606 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.70009460737937, - "f1": 88.04831949879843 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 20.71169916152954, - "eval_elapsed_time": 28.054355942178518 - }, - "speedup": 1.863410273796239, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 978944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1107456, - "linear_dense_total": 4718592, - "linear_nnz": 2086400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 721920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1273344, - "linear_dense_total": 4718592, - "linear_nnz": 1995264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 615424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 162816, - "linear_dense_total": 4718592, - "linear_nnz": 778240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 403456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 477696, - "linear_dense_total": 4718592, - "linear_nnz": 881152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1232896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1505280, - "linear_dense_total": 4718592, - "linear_nnz": 2738176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1455104, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1543680, - "linear_dense_total": 4718592, - "linear_nnz": 2998784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1598464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1609728, - "linear_dense_total": 4718592, - "linear_nnz": 3208192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1596416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1508352, - "linear_dense_total": 4718592, - "linear_nnz": 3104768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1373184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1125888, - "linear_dense_total": 4718592, - "linear_nnz": 2499072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1165312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 837120, - "linear_dense_total": 4718592, - "linear_nnz": 2002432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1163264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 468480, - "linear_dense_total": 4718592, - "linear_nnz": 1631744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 740352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 207360, - "linear_dense_total": 4718592, - "linear_nnz": 947712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 24871936, - "linear_sparsity": 70.71638695987654, - "linear_total": 84934656, - "nnz": 48790134, - "total": 108893186, - "total_sparsity": 55.19450225287742 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-95000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.79470198675497, - "f1": 88.10958975740277 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.5, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 20.632953029632567, - "eval_elapsed_time": 27.97396031860262 - }, - "speedup": 1.8705220212512832, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 976896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1125888, - "linear_dense_total": 4718592, - "linear_nnz": 2102784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 733184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1285632, - "linear_dense_total": 4718592, - "linear_nnz": 2018816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 636928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 168960, - "linear_dense_total": 4718592, - "linear_nnz": 805888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 418816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 485376, - "linear_dense_total": 4718592, - "linear_nnz": 904192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1252352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1523712, - "linear_dense_total": 4718592, - "linear_nnz": 2776064, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1437696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1555968, - "linear_dense_total": 4718592, - "linear_nnz": 2993664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1545216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1617408, - "linear_dense_total": 4718592, - "linear_nnz": 3162624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1574912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1514496, - "linear_dense_total": 4718592, - "linear_nnz": 3089408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1370112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1135104, - "linear_dense_total": 4718592, - "linear_nnz": 2505216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1178624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 847872, - "linear_dense_total": 4718592, - "linear_nnz": 2026496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1190912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 474624, - "linear_dense_total": 4718592, - "linear_nnz": 1665536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 748544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 208896, - "linear_dense_total": 4718592, - "linear_nnz": 957440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 25008128, - "linear_sparsity": 70.55603780864197, - "linear_total": 84934656, - "nnz": 48926434, - "total": 108893186, - "total_sparsity": 55.069333723048565 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.58656575212866, - "f1": 88.06903108265608 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 19.595643711090087, - "eval_elapsed_time": 26.718373194802552 - }, - "speedup": 1.9695394330694393, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1055744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 526848, - "linear_dense_total": 4718592, - "linear_nnz": 1582592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 809984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 752640, - "linear_dense_total": 4718592, - "linear_nnz": 1562624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 652288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 98304, - "linear_dense_total": 4718592, - "linear_nnz": 750592, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 419840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 262656, - "linear_dense_total": 4718592, - "linear_nnz": 682496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1316864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 873984, - "linear_dense_total": 4718592, - "linear_nnz": 2190848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1468416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 952320, - "linear_dense_total": 4718592, - "linear_nnz": 2420736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1651712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1046016, - "linear_dense_total": 4718592, - "linear_nnz": 2697728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1616896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 986112, - "linear_dense_total": 4718592, - "linear_nnz": 2603008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1361920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 740352, - "linear_dense_total": 4718592, - "linear_nnz": 2102272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1265664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 559104, - "linear_dense_total": 4718592, - "linear_nnz": 1824768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1212416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 293376, - "linear_dense_total": 4718592, - "linear_nnz": 1505792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 749568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 113664, - "linear_dense_total": 4718592, - "linear_nnz": 863232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 20786688, - "linear_sparsity": 75.52625868055556, - "linear_total": 84934656, - "nnz": 44702229, - "total": 108893186, - "total_sparsity": 58.94855257518133 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl2--2021-01-21--00-51-49/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.10406811731315, - "f1": 87.56487698206614 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 2.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 19.459814723968506, - "eval_elapsed_time": 26.6199238197878 - }, - "speedup": 1.9832867657180042, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1210368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 210432, - "linear_dense_total": 4718592, - "linear_nnz": 1420800, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 977920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 403968, - "linear_dense_total": 4718592, - "linear_nnz": 1381888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 712704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 69120, - "linear_dense_total": 4718592, - "linear_nnz": 781824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 443392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 136704, - "linear_dense_total": 4718592, - "linear_nnz": 580096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1500160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 513024, - "linear_dense_total": 4718592, - "linear_nnz": 2013184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1526784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 588288, - "linear_dense_total": 4718592, - "linear_nnz": 2115072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1734656, - "linear_attention_total": 2359296, - "linear_dense_nnz": 660480, - "linear_dense_total": 4718592, - "linear_nnz": 2395136, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1659904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 551424, - "linear_dense_total": 4718592, - "linear_nnz": 2211328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1486848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 456192, - "linear_dense_total": 4718592, - "linear_nnz": 1943040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1254400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 336384, - "linear_dense_total": 4718592, - "linear_nnz": 1590784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1267712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 173568, - "linear_dense_total": 4718592, - "linear_nnz": 1441280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 760832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 76800, - "linear_dense_total": 4718592, - "linear_nnz": 837632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 18712064, - "linear_sparsity": 77.96887056327161, - "linear_total": 84934656, - "nnz": 42626625, - "total": 108893186, - "total_sparsity": 60.85464429335368 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a16-l10-dl1--2021-01-24--15-45-00/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.00946073793756, - "f1": 87.65780769915727 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 16, - "dense_block_rows": 16, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 26.317300163269042, - "eval_elapsed_time": 33.56822411296889 - }, - "speedup": 1.4665027478478643, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 720896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1657600, - "linear_dense_total": 4718592, - "linear_nnz": 2378496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 719872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2046464, - "linear_dense_total": 4718592, - "linear_nnz": 2766336, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 450560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 272128, - "linear_dense_total": 4718592, - "linear_nnz": 722688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 307456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 311808, - "linear_dense_total": 4718592, - "linear_nnz": 619264, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1058304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2721792, - "linear_dense_total": 4718592, - "linear_nnz": 3780096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1227776, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2707200, - "linear_dense_total": 4718592, - "linear_nnz": 3934976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1367808, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2789888, - "linear_dense_total": 4718592, - "linear_nnz": 4157696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1258240, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2672384, - "linear_dense_total": 4718592, - "linear_nnz": 3930624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1130496, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2136064, - "linear_dense_total": 4718592, - "linear_nnz": 3266560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 988928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1491200, - "linear_dense_total": 4718592, - "linear_nnz": 2480128, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 888576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 653568, - "linear_dense_total": 4718592, - "linear_nnz": 1542144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 567296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 249088, - "linear_dense_total": 4718592, - "linear_nnz": 816384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30395392, - "linear_sparsity": 64.21320408950618, - "linear_total": 84934656, - "nnz": 54326914, - "total": 108893186, - "total_sparsity": 50.10990494850615 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l10-dl1--2021-01-24--15-47-42/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.9271523178808, - "f1": 88.21768668110452 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 33.44704815673828, - "eval_elapsed_time": 40.718972705770284 - }, - "speedup": 1.1538953400165994, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 528912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2409360, - "linear_dense_total": 4718592, - "linear_nnz": 2938272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 618448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2534112, - "linear_dense_total": 4718592, - "linear_nnz": 3152560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 357616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 309216, - "linear_dense_total": 4718592, - "linear_nnz": 666832, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 219536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 276672, - "linear_dense_total": 4718592, - "linear_nnz": 496208, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 835904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2670704, - "linear_dense_total": 4718592, - "linear_nnz": 3506608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 958400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2670800, - "linear_dense_total": 4718592, - "linear_nnz": 3629200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1091248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2620432, - "linear_dense_total": 4718592, - "linear_nnz": 3711680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1029984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2535968, - "linear_dense_total": 4718592, - "linear_nnz": 3565952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 964544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2286960, - "linear_dense_total": 4718592, - "linear_nnz": 3251504, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 813552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1727488, - "linear_dense_total": 4718592, - "linear_nnz": 2541040, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 744336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1096768, - "linear_dense_total": 4718592, - "linear_nnz": 1841104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 473664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 386800, - "linear_dense_total": 4718592, - "linear_nnz": 860464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30161424, - "linear_sparsity": 64.48867232711225, - "linear_total": 84934656, - "nnz": 54106194, - "total": 108893186, - "total_sparsity": 50.31259899035372 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l20-dl1--2021-01-24--15-48-09/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.72563859981078, - "f1": 87.37325813950282 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.62903995513916, - "eval_elapsed_time": 37.18844554480165 - }, - "speedup": 1.302586687378539, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 369200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1492736, - "linear_dense_total": 4718592, - "linear_nnz": 1861936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 467520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1640864, - "linear_dense_total": 4718592, - "linear_nnz": 2108384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 242752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 173136, - "linear_dense_total": 4718592, - "linear_nnz": 415888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 157440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 168992, - "linear_dense_total": 4718592, - "linear_nnz": 326432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 642896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1775952, - "linear_dense_total": 4718592, - "linear_nnz": 2418848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 744752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1758400, - "linear_dense_total": 4718592, - "linear_nnz": 2503152, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 774128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1673184, - "linear_dense_total": 4718592, - "linear_nnz": 2447312, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 636736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1581904, - "linear_dense_total": 4718592, - "linear_nnz": 2218640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 605744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1321456, - "linear_dense_total": 4718592, - "linear_nnz": 1927200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 548160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 906608, - "linear_dense_total": 4718592, - "linear_nnz": 1454768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 486720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 524512, - "linear_dense_total": 4718592, - "linear_nnz": 1011232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 307184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 180400, - "linear_dense_total": 4718592, - "linear_nnz": 487584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 19181376, - "linear_sparsity": 77.41631401909721, - "linear_total": 84934656, - "nnz": 43119238, - "total": 108893186, - "total_sparsity": 60.40226245194075 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l20-dl1--2021-01-24--15-48-09/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.77294228949859, - "f1": 87.35885990249378 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.662232711791994, - "eval_elapsed_time": 37.211166836321354 - }, - "speedup": 1.3011290613342195, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 369024, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1492400, - "linear_dense_total": 4718592, - "linear_nnz": 1861424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 467072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1640528, - "linear_dense_total": 4718592, - "linear_nnz": 2107600, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 242352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 173264, - "linear_dense_total": 4718592, - "linear_nnz": 415616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 157280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 168800, - "linear_dense_total": 4718592, - "linear_nnz": 326080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 643248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1776032, - "linear_dense_total": 4718592, - "linear_nnz": 2419280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 744560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1758000, - "linear_dense_total": 4718592, - "linear_nnz": 2502560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 773760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1672784, - "linear_dense_total": 4718592, - "linear_nnz": 2446544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 636208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1581568, - "linear_dense_total": 4718592, - "linear_nnz": 2217776, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 605664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1321040, - "linear_dense_total": 4718592, - "linear_nnz": 1926704, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 548160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 906384, - "linear_dense_total": 4718592, - "linear_nnz": 1454544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 486464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 524352, - "linear_dense_total": 4718592, - "linear_nnz": 1010816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 306864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 180544, - "linear_dense_total": 4718592, - "linear_nnz": 487408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 19176352, - "linear_sparsity": 77.42222915461035, - "linear_total": 84934656, - "nnz": 43114218, - "total": 108893186, - "total_sparsity": 60.40687247409585 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l40-dl1--2021-01-24--15-48-35/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.84295175023652, - "f1": 85.93146728512978 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 24.637864067077636, - "eval_elapsed_time": 32.05906807305291 - }, - "speedup": 1.5664666750452154, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 246400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 777312, - "linear_dense_total": 4718592, - "linear_nnz": 1023712, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 381872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 904544, - "linear_dense_total": 4718592, - "linear_nnz": 1286416, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 169216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 118720, - "linear_dense_total": 4718592, - "linear_nnz": 287936, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 113984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 110384, - "linear_dense_total": 4718592, - "linear_nnz": 224368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 460688, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1024768, - "linear_dense_total": 4718592, - "linear_nnz": 1485456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 556080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1006160, - "linear_dense_total": 4718592, - "linear_nnz": 1562240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 487760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 920208, - "linear_dense_total": 4718592, - "linear_nnz": 1407968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 403424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 859936, - "linear_dense_total": 4718592, - "linear_nnz": 1263360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 380560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 679056, - "linear_dense_total": 4718592, - "linear_nnz": 1059616, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 400704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 463040, - "linear_dense_total": 4718592, - "linear_nnz": 863744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 266832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 283984, - "linear_dense_total": 4718592, - "linear_nnz": 550816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 225120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 102560, - "linear_dense_total": 4718592, - "linear_nnz": 327680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 11343312, - "linear_sparsity": 86.64466010199654, - "linear_total": 84934656, - "nnz": 35270510, - "total": 108893186, - "total_sparsity": 67.60999352154138 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l40-dl1--2021-01-24--15-48-35/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.47398297067171, - "f1": 85.88482767255138 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 24.631753623962403, - "eval_elapsed_time": 32.0392144843936 - }, - "speedup": 1.5668552712310941, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 244016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 761056, - "linear_dense_total": 4718592, - "linear_nnz": 1005072, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 377744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 887392, - "linear_dense_total": 4718592, - "linear_nnz": 1265136, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 166848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 118128, - "linear_dense_total": 4718592, - "linear_nnz": 284976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 113088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 110272, - "linear_dense_total": 4718592, - "linear_nnz": 223360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 453520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1009968, - "linear_dense_total": 4718592, - "linear_nnz": 1463488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 549264, - "linear_attention_total": 2359296, - "linear_dense_nnz": 989184, - "linear_dense_total": 4718592, - "linear_nnz": 1538448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 480528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 907024, - "linear_dense_total": 4718592, - "linear_nnz": 1387552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 397568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 846224, - "linear_dense_total": 4718592, - "linear_nnz": 1243792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 373968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 670144, - "linear_dense_total": 4718592, - "linear_nnz": 1044112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 394160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 457328, - "linear_dense_total": 4718592, - "linear_nnz": 851488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 262368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 280864, - "linear_dense_total": 4718592, - "linear_nnz": 543232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 222176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 101472, - "linear_dense_total": 4718592, - "linear_nnz": 323648, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 11174304, - "linear_sparsity": 86.84364601417825, - "linear_total": 84934656, - "nnz": 35101310, - "total": 108893186, - "total_sparsity": 67.7653751447772 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a4-l40-dl1--2021-01-24--15-48-35/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 77.4077578051088, - "f1": 85.78500582028688 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 24.588402084350587, - "eval_elapsed_time": 32.04897632403299 - }, - "speedup": 1.5696177764204813, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 244080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 760240, - "linear_dense_total": 4718592, - "linear_nnz": 1004320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 377328, - "linear_attention_total": 2359296, - "linear_dense_nnz": 887488, - "linear_dense_total": 4718592, - "linear_nnz": 1264816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 166640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 117888, - "linear_dense_total": 4718592, - "linear_nnz": 284528, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 113056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 110240, - "linear_dense_total": 4718592, - "linear_nnz": 223296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 453680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1009680, - "linear_dense_total": 4718592, - "linear_nnz": 1463360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 549056, - "linear_attention_total": 2359296, - "linear_dense_nnz": 988176, - "linear_dense_total": 4718592, - "linear_nnz": 1537232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 480112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 906608, - "linear_dense_total": 4718592, - "linear_nnz": 1386720, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 397488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 846544, - "linear_dense_total": 4718592, - "linear_nnz": 1244032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 373632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 669920, - "linear_dense_total": 4718592, - "linear_nnz": 1043552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 393728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 457008, - "linear_dense_total": 4718592, - "linear_nnz": 850736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 262272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 280816, - "linear_dense_total": 4718592, - "linear_nnz": 543088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 221824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 101360, - "linear_dense_total": 4718592, - "linear_nnz": 323184, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 11168864, - "linear_sparsity": 86.85005093798226, - "linear_total": 84934656, - "nnz": 35095854, - "total": 108893186, - "total_sparsity": 67.77038555929478 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a8-l10-dl1--2021-01-24--15-46-20/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.6244087038789, - "f1": 88.02730364897265 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.553753234863283, - "eval_elapsed_time": 36.97127141384408 - }, - "speedup": 1.3059049623464731, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 634048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2103872, - "linear_dense_total": 4718592, - "linear_nnz": 2737920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 662208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2320064, - "linear_dense_total": 4718592, - "linear_nnz": 2982272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 398848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 299520, - "linear_dense_total": 4718592, - "linear_nnz": 698368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 262976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 298560, - "linear_dense_total": 4718592, - "linear_nnz": 561536, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 975744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2637888, - "linear_dense_total": 4718592, - "linear_nnz": 3613632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1107392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2681408, - "linear_dense_total": 4718592, - "linear_nnz": 3788800, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1248448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2625472, - "linear_dense_total": 4718592, - "linear_nnz": 3873920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1182592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2558784, - "linear_dense_total": 4718592, - "linear_nnz": 3741376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1016896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2130624, - "linear_dense_total": 4718592, - "linear_nnz": 3147520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 915648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1523904, - "linear_dense_total": 4718592, - "linear_nnz": 2439552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 820288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 827456, - "linear_dense_total": 4718592, - "linear_nnz": 1647744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 514176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 305408, - "linear_dense_total": 4718592, - "linear_nnz": 819584, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30052224, - "linear_sparsity": 64.61724175347221, - "linear_total": 84934656, - "nnz": 53991210, - "total": 108893186, - "total_sparsity": 50.418192374314394 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a8-l10-dl1--2021-01-24--15-46-20/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.54872280037843, - "f1": 87.861684752796 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.255816642761232, - "eval_elapsed_time": 36.84984774328768 - }, - "speedup": 1.319204091160467, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 633664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2102592, - "linear_dense_total": 4718592, - "linear_nnz": 2736256, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 662336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2319616, - "linear_dense_total": 4718592, - "linear_nnz": 2981952, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 396032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 297856, - "linear_dense_total": 4718592, - "linear_nnz": 693888, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 262208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 297792, - "linear_dense_total": 4718592, - "linear_nnz": 560000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 975296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2636544, - "linear_dense_total": 4718592, - "linear_nnz": 3611840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1107968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2680128, - "linear_dense_total": 4718592, - "linear_nnz": 3788096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1247936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2623936, - "linear_dense_total": 4718592, - "linear_nnz": 3871872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1181888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2558208, - "linear_dense_total": 4718592, - "linear_nnz": 3740096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1015040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2132480, - "linear_dense_total": 4718592, - "linear_nnz": 3147520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 913792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1523328, - "linear_dense_total": 4718592, - "linear_nnz": 2437120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 818752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 827264, - "linear_dense_total": 4718592, - "linear_nnz": 1646016, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 514368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 304640, - "linear_dense_total": 4718592, - "linear_nnz": 819008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30033664, - "linear_sparsity": 64.6390938464506, - "linear_total": 84934656, - "nnz": 53972650, - "total": 108893186, - "total_sparsity": 50.4352365996528 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a8-l20-dl1--2021-01-24--15-46-47/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.05392620624409, - "f1": 86.84949475139184 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 24.667898628234862, - "eval_elapsed_time": 32.10200677579269 - }, - "speedup": 1.5645594133095706, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 407936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1088064, - "linear_dense_total": 4718592, - "linear_nnz": 1496000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 569088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1378944, - "linear_dense_total": 4718592, - "linear_nnz": 1948032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 298112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 181568, - "linear_dense_total": 4718592, - "linear_nnz": 479680, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 185728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 199488, - "linear_dense_total": 4718592, - "linear_nnz": 385216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 770560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1695552, - "linear_dense_total": 4718592, - "linear_nnz": 2466112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 902848, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1680512, - "linear_dense_total": 4718592, - "linear_nnz": 2583360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 913216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1624640, - "linear_dense_total": 4718592, - "linear_nnz": 2537856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 749440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1534912, - "linear_dense_total": 4718592, - "linear_nnz": 2284352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 684480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1190976, - "linear_dense_total": 4718592, - "linear_nnz": 1875456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 672320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 815872, - "linear_dense_total": 4718592, - "linear_nnz": 1488192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 570176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 399104, - "linear_dense_total": 4718592, - "linear_nnz": 969280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 345664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 167744, - "linear_dense_total": 4718592, - "linear_nnz": 513408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 19026944, - "linear_sparsity": 77.59813850308642, - "linear_total": 84934656, - "nnz": 42955274, - "total": 108893186, - "total_sparsity": 60.55283569350244 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v11-a8-l40-dl1--2021-01-24--15-47-15/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 76.87795648060549, - "f1": 85.16652519097626 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 19.238733966827393, - "eval_elapsed_time": 26.43846725206822 - }, - "speedup": 2.0060775865978457, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 330432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 520000, - "linear_dense_total": 4718592, - "linear_nnz": 850432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 468224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 724864, - "linear_dense_total": 4718592, - "linear_nnz": 1193088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 206912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 137088, - "linear_dense_total": 4718592, - "linear_nnz": 344000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 127744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 128064, - "linear_dense_total": 4718592, - "linear_nnz": 255808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 511104, - "linear_attention_total": 2359296, - "linear_dense_nnz": 975680, - "linear_dense_total": 4718592, - "linear_nnz": 1486784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 688192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 908032, - "linear_dense_total": 4718592, - "linear_nnz": 1596224, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 551360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 863296, - "linear_dense_total": 4718592, - "linear_nnz": 1414656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 466304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 787328, - "linear_dense_total": 4718592, - "linear_nnz": 1253632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 451840, - "linear_attention_total": 2359296, - "linear_dense_nnz": 695488, - "linear_dense_total": 4718592, - "linear_nnz": 1147328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 497920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 475840, - "linear_dense_total": 4718592, - "linear_nnz": 973760, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 302528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 217600, - "linear_dense_total": 4718592, - "linear_nnz": 520128, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 255168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 91264, - "linear_dense_total": 4718592, - "linear_nnz": 346432, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 11382272, - "linear_sparsity": 86.59878954475309, - "linear_total": 84934656, - "nnz": 35298682, - "total": 108893186, - "total_sparsity": 67.5841222976064 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.93661305581836, - "f1": 88.35425478567389 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 23.427229469299316, - "eval_elapsed_time": 30.796412555966526 - }, - "speedup": 1.6474160145973682, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 880896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1947648, - "linear_dense_total": 4718592, - "linear_nnz": 2828544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 849152, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1970688, - "linear_dense_total": 4718592, - "linear_nnz": 2819840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 583168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 228864, - "linear_dense_total": 4718592, - "linear_nnz": 812032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 397312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 824832, - "linear_dense_total": 4718592, - "linear_nnz": 1222144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1169408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2198016, - "linear_dense_total": 4718592, - "linear_nnz": 3367424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1352448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2168832, - "linear_dense_total": 4718592, - "linear_nnz": 3521280, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1524992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2168832, - "linear_dense_total": 4718592, - "linear_nnz": 3693824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1511680, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2042880, - "linear_dense_total": 4718592, - "linear_nnz": 3554560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1336320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1598976, - "linear_dense_total": 4718592, - "linear_nnz": 2935296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1178112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1274880, - "linear_dense_total": 4718592, - "linear_nnz": 2452992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1134080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 701952, - "linear_dense_total": 4718592, - "linear_nnz": 1836032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 702976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 321024, - "linear_dense_total": 4718592, - "linear_nnz": 1024000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30067968, - "linear_sparsity": 64.59870515046296, - "linear_total": 84934656, - "nnz": 53990689, - "total": 108893186, - "total_sparsity": 50.41867082482094 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v5-a16-l5--2021-01-17--14-55-26/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 81.11636707663197, - "f1": 88.26635621180897 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 5.0 - }, - "speed": { - "cuda_eval_elapsed_time": 23.067204750061034, - "eval_elapsed_time": 30.552880198229104 - }, - "speedup": 1.6731282972319816, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 878336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1947648, - "linear_dense_total": 4718592, - "linear_nnz": 2825984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 852736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1970688, - "linear_dense_total": 4718592, - "linear_nnz": 2823424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 583168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 228864, - "linear_dense_total": 4718592, - "linear_nnz": 812032, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 385792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 824832, - "linear_dense_total": 4718592, - "linear_nnz": 1210624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1168384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2198016, - "linear_dense_total": 4718592, - "linear_nnz": 3366400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1360384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2168832, - "linear_dense_total": 4718592, - "linear_nnz": 3529216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1525248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2168832, - "linear_dense_total": 4718592, - "linear_nnz": 3694080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1519360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2042880, - "linear_dense_total": 4718592, - "linear_nnz": 3562240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1345792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1598976, - "linear_dense_total": 4718592, - "linear_nnz": 2944768, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1175296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1274880, - "linear_dense_total": 4718592, - "linear_nnz": 2450176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1126912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 701952, - "linear_dense_total": 4718592, - "linear_nnz": 1828864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 702464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 321024, - "linear_dense_total": 4718592, - "linear_nnz": 1023488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30071296, - "linear_sparsity": 64.5947868441358, - "linear_total": 84934656, - "nnz": 53994017, - "total": 108893186, - "total_sparsity": 50.41561461889819 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.93661305581836, - "f1": 88.29241912882233 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.73566310119629, - "eval_elapsed_time": 37.101448519621044 - }, - "speedup": 1.2979160032189903, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 466432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2896491, - "linear_dense_total": 4718592, - "linear_nnz": 3362923, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 578560, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2933262, - "linear_dense_total": 4718592, - "linear_nnz": 3511822, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 353792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1168742, - "linear_dense_total": 4718592, - "linear_nnz": 1522534, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 204032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 632549, - "linear_dense_total": 4718592, - "linear_nnz": 836581, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 636672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3005770, - "linear_dense_total": 4718592, - "linear_nnz": 3642442, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 857344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2985991, - "linear_dense_total": 4718592, - "linear_nnz": 3843335, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 829184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2939127, - "linear_dense_total": 4718592, - "linear_nnz": 3768311, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 754432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2915969, - "linear_dense_total": 4718592, - "linear_nnz": 3670401, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 767488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2787598, - "linear_dense_total": 4718592, - "linear_nnz": 3555086, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 752640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2497594, - "linear_dense_total": 4718592, - "linear_nnz": 3250234, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 553472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2115777, - "linear_dense_total": 4718592, - "linear_nnz": 2669249, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 413440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1490216, - "linear_dense_total": 4718592, - "linear_nnz": 1903656, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 35536574, - "linear_sparsity": 58.160101337197375, - "linear_total": 84934656, - "nnz": 59478503, - "total": 108893186, - "total_sparsity": 45.379040521415185 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l10--2021-01-19--16-57-25/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.77578051087986, - "f1": 88.22778160568927 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.759838722229006, - "eval_elapsed_time": 37.11843426898122 - }, - "speedup": 1.2968616317313288, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 469248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2896466, - "linear_dense_total": 4718592, - "linear_nnz": 3365714, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 574976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2933134, - "linear_dense_total": 4718592, - "linear_nnz": 3508110, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 355584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1168698, - "linear_dense_total": 4718592, - "linear_nnz": 1524282, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 201472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 632483, - "linear_dense_total": 4718592, - "linear_nnz": 833955, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 634624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3005666, - "linear_dense_total": 4718592, - "linear_nnz": 3640290, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 851456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2985914, - "linear_dense_total": 4718592, - "linear_nnz": 3837370, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 830720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2938982, - "linear_dense_total": 4718592, - "linear_nnz": 3769702, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 756480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2915873, - "linear_dense_total": 4718592, - "linear_nnz": 3672353, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 768256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2787463, - "linear_dense_total": 4718592, - "linear_nnz": 3555719, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 753408, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2497485, - "linear_dense_total": 4718592, - "linear_nnz": 3250893, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 550912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2115685, - "linear_dense_total": 4718592, - "linear_nnz": 2666597, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 413184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1490132, - "linear_dense_total": 4718592, - "linear_nnz": 1903316, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 35528301, - "linear_sparsity": 58.16984176635742, - "linear_total": 84934656, - "nnz": 59470230, - "total": 108893186, - "total_sparsity": 45.38663787466004 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.92431409649953, - "f1": 87.57193515884181 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 27.83310959625244, - "eval_elapsed_time": 35.16166925104335 - }, - "speedup": 1.3866360448121684, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 341248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2332385, - "linear_dense_total": 4718592, - "linear_nnz": 2673633, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 462592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2387588, - "linear_dense_total": 4718592, - "linear_nnz": 2850180, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 227328, - "linear_attention_total": 2359296, - "linear_dense_nnz": 646176, - "linear_dense_total": 4718592, - "linear_nnz": 873504, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 128000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 326046, - "linear_dense_total": 4718592, - "linear_nnz": 454046, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 412672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2458384, - "linear_dense_total": 4718592, - "linear_nnz": 2871056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 692736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2421331, - "linear_dense_total": 4718592, - "linear_nnz": 3114067, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 505088, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2348506, - "linear_dense_total": 4718592, - "linear_nnz": 2853594, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 548864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2322654, - "linear_dense_total": 4718592, - "linear_nnz": 2871518, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 469504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2138640, - "linear_dense_total": 4718592, - "linear_nnz": 2608144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 552448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1830183, - "linear_dense_total": 4718592, - "linear_nnz": 2382631, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 316672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1440503, - "linear_dense_total": 4718592, - "linear_nnz": 1757175, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 292096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 859209, - "linear_dense_total": 4718592, - "linear_nnz": 1151305, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 26460853, - "linear_sparsity": 68.84563469592435, - "linear_total": 84934656, - "nnz": 50398933, - "total": 108893186, - "total_sparsity": 53.71709208691902 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l20--2021-01-19--16-57-51/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.02838221381268, - "f1": 87.5280353923367 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 27.96729845428467, - "eval_elapsed_time": 35.3477450478822 - }, - "speedup": 1.3799828778048573, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 335872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2332233, - "linear_dense_total": 4718592, - "linear_nnz": 2668105, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 451584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2387496, - "linear_dense_total": 4718592, - "linear_nnz": 2839080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 224768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 646159, - "linear_dense_total": 4718592, - "linear_nnz": 870927, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 124672, - "linear_attention_total": 2359296, - "linear_dense_nnz": 325999, - "linear_dense_total": 4718592, - "linear_nnz": 450671, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 408576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2458332, - "linear_dense_total": 4718592, - "linear_nnz": 2866908, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 682496, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2421186, - "linear_dense_total": 4718592, - "linear_nnz": 3103682, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 504832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2348406, - "linear_dense_total": 4718592, - "linear_nnz": 2853238, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 558336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2322448, - "linear_dense_total": 4718592, - "linear_nnz": 2880784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 475904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2138474, - "linear_dense_total": 4718592, - "linear_nnz": 2614378, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 542720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1830088, - "linear_dense_total": 4718592, - "linear_nnz": 2372808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 312576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1440402, - "linear_dense_total": 4718592, - "linear_nnz": 1752978, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 288000, - "linear_attention_total": 2359296, - "linear_dense_nnz": 859129, - "linear_dense_total": 4718592, - "linear_nnz": 1147129, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 26420688, - "linear_sparsity": 68.89292399088542, - "linear_total": 84934656, - "nnz": 50358753, - "total": 108893186, - "total_sparsity": 53.75399063078199 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a16-l40--2021-01-19--16-58-18/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.86471144749291, - "f1": 86.87223379259328 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 26.916674156188964, - "eval_elapsed_time": 34.25446852017194 - }, - "speedup": 1.4338470191904102, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 211712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1718621, - "linear_dense_total": 4718592, - "linear_nnz": 1930333, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 345600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1771278, - "linear_dense_total": 4718592, - "linear_nnz": 2116878, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 157696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 325955, - "linear_dense_total": 4718592, - "linear_nnz": 483651, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 90368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 164774, - "linear_dense_total": 4718592, - "linear_nnz": 255142, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 278016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1816807, - "linear_dense_total": 4718592, - "linear_nnz": 2094823, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 493312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1772769, - "linear_dense_total": 4718592, - "linear_nnz": 2266081, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 304128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1682765, - "linear_dense_total": 4718592, - "linear_nnz": 1986893, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 357376, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1635131, - "linear_dense_total": 4718592, - "linear_nnz": 1992507, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 278528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1457711, - "linear_dense_total": 4718592, - "linear_nnz": 1736239, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 355072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1174807, - "linear_dense_total": 4718592, - "linear_nnz": 1529879, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 183552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 867865, - "linear_dense_total": 4718592, - "linear_nnz": 1051417, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 196864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 439457, - "linear_dense_total": 4718592, - "linear_nnz": 636321, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 18080164, - "linear_sparsity": 78.7128542676384, - "linear_total": 84934656, - "nnz": 42014844, - "total": 108893186, - "total_sparsity": 61.41646181607727 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l10--2021-01-19--17-00-07/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 81.3434247871334, - "f1": 88.502960365548 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 34.458772911071776, - "eval_elapsed_time": 41.833797600120306 - }, - "speedup": 1.120016464456589, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 356016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2711219, - "linear_dense_total": 4718592, - "linear_nnz": 3067235, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 506400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2753947, - "linear_dense_total": 4718592, - "linear_nnz": 3260347, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 305952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 956610, - "linear_dense_total": 4718592, - "linear_nnz": 1262562, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 172864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 532866, - "linear_dense_total": 4718592, - "linear_nnz": 705730, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 658880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2827796, - "linear_dense_total": 4718592, - "linear_nnz": 3486676, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 782176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2810214, - "linear_dense_total": 4718592, - "linear_nnz": 3592390, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 874272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2770460, - "linear_dense_total": 4718592, - "linear_nnz": 3644732, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 772928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2750302, - "linear_dense_total": 4718592, - "linear_nnz": 3523230, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 767984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2610331, - "linear_dense_total": 4718592, - "linear_nnz": 3378315, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 687968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2295378, - "linear_dense_total": 4718592, - "linear_nnz": 2983346, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 596368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1868727, - "linear_dense_total": 4718592, - "linear_nnz": 2465095, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 404448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1245775, - "linear_dense_total": 4718592, - "linear_nnz": 1650223, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 33019881, - "linear_sparsity": 61.12319451791268, - "linear_total": 84934656, - "nnz": 56967217, - "total": 108893186, - "total_sparsity": 47.6852325727709 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l20--2021-01-19--17-00-34/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.54872280037843, - "f1": 88.09731480353894 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 31.47156787109375, - "eval_elapsed_time": 38.88521202793345 - }, - "speedup": 1.2263257160702048, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 233808, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2108257, - "linear_dense_total": 4718592, - "linear_nnz": 2342065, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 370912, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2165809, - "linear_dense_total": 4718592, - "linear_nnz": 2536721, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 189856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 496337, - "linear_dense_total": 4718592, - "linear_nnz": 686193, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 106192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 273404, - "linear_dense_total": 4718592, - "linear_nnz": 379596, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 368864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2238488, - "linear_dense_total": 4718592, - "linear_nnz": 2607352, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 528528, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2212294, - "linear_dense_total": 4718592, - "linear_nnz": 2740822, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 515168, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2147598, - "linear_dense_total": 4718592, - "linear_nnz": 2662766, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 456576, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2125672, - "linear_dense_total": 4718592, - "linear_nnz": 2582248, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 426512, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1957790, - "linear_dense_total": 4718592, - "linear_nnz": 2384302, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 424416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1621523, - "linear_dense_total": 4718592, - "linear_nnz": 2045939, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 311248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1220304, - "linear_dense_total": 4718592, - "linear_nnz": 1531552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 249120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 687520, - "linear_dense_total": 4718592, - "linear_nnz": 936640, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 23436196, - "linear_sparsity": 72.40679234634212, - "linear_total": 84934656, - "nnz": 47377613, - "total": 108893186, - "total_sparsity": 56.49166422589565 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.39451277199622, - "f1": 87.22039562207584 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.016168815612794, - "eval_elapsed_time": 36.33264479693025 - }, - "speedup": 1.3300995472773969, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 145232, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1501972, - "linear_dense_total": 4718592, - "linear_nnz": 1647204, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 280192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1562394, - "linear_dense_total": 4718592, - "linear_nnz": 1842586, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 126288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 233713, - "linear_dense_total": 4718592, - "linear_nnz": 360001, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 73824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 141408, - "linear_dense_total": 4718592, - "linear_nnz": 215232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 234064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1607786, - "linear_dense_total": 4718592, - "linear_nnz": 1841850, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 386752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1573980, - "linear_dense_total": 4718592, - "linear_nnz": 1960732, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 281632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1495134, - "linear_dense_total": 4718592, - "linear_nnz": 1776766, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 288320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1455910, - "linear_dense_total": 4718592, - "linear_nnz": 1744230, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 240864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1278042, - "linear_dense_total": 4718592, - "linear_nnz": 1518906, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 275424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1000907, - "linear_dense_total": 4718592, - "linear_nnz": 1276331, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 170816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 708174, - "linear_dense_total": 4718592, - "linear_nnz": 878990, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 165920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 330996, - "linear_dense_total": 4718592, - "linear_nnz": 496916, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15559744, - "linear_sparsity": 81.68033552758487, - "linear_total": 84934656, - "nnz": 39496838, - "total": 108893186, - "total_sparsity": 63.728825052469304 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.12961210974456, - "f1": 87.04337592394437 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.089330375671388, - "eval_elapsed_time": 36.40407280996442 - }, - "speedup": 1.3267542603060118, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 142224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1492013, - "linear_dense_total": 4718592, - "linear_nnz": 1634237, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 275696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1552665, - "linear_dense_total": 4718592, - "linear_nnz": 1828361, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 124096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 231707, - "linear_dense_total": 4718592, - "linear_nnz": 355803, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 72608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 140436, - "linear_dense_total": 4718592, - "linear_nnz": 213044, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 227984, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1597983, - "linear_dense_total": 4718592, - "linear_nnz": 1825967, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 379616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1563950, - "linear_dense_total": 4718592, - "linear_nnz": 1943566, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 275824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1485631, - "linear_dense_total": 4718592, - "linear_nnz": 1761455, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 282736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1446554, - "linear_dense_total": 4718592, - "linear_nnz": 1729290, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 235856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1269099, - "linear_dense_total": 4718592, - "linear_nnz": 1504955, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 269520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 993705, - "linear_dense_total": 4718592, - "linear_nnz": 1263225, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 167616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 702880, - "linear_dense_total": 4718592, - "linear_nnz": 870496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 161552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 328143, - "linear_dense_total": 4718592, - "linear_nnz": 489695, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15420094, - "linear_sparsity": 81.84475604398752, - "linear_total": 84934656, - "nnz": 39357122, - "total": 108893186, - "total_sparsity": 63.85713060135829 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a4-l40--2021-01-19--17-01-00/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.09176915799432, - "f1": 86.93076968810146 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.182387649536132, - "eval_elapsed_time": 36.50873678829521 - }, - "speedup": 1.3225234846739682, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 142224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1491817, - "linear_dense_total": 4718592, - "linear_nnz": 1634041, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 275888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1552458, - "linear_dense_total": 4718592, - "linear_nnz": 1828346, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 123920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 231690, - "linear_dense_total": 4718592, - "linear_nnz": 355610, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 72512, - "linear_attention_total": 2359296, - "linear_dense_nnz": 140404, - "linear_dense_total": 4718592, - "linear_nnz": 212916, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 227744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1597816, - "linear_dense_total": 4718592, - "linear_nnz": 1825560, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 379008, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1563794, - "linear_dense_total": 4718592, - "linear_nnz": 1942802, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 276192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1485468, - "linear_dense_total": 4718592, - "linear_nnz": 1761660, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 282096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1446397, - "linear_dense_total": 4718592, - "linear_nnz": 1728493, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 235856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1268987, - "linear_dense_total": 4718592, - "linear_nnz": 1504843, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 269456, - "linear_attention_total": 2359296, - "linear_dense_nnz": 993538, - "linear_dense_total": 4718592, - "linear_nnz": 1262994, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 167520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 702743, - "linear_dense_total": 4718592, - "linear_nnz": 870263, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 161424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 328079, - "linear_dense_total": 4718592, - "linear_nnz": 489503, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 15417031, - "linear_sparsity": 81.84836234575437, - "linear_total": 84934656, - "nnz": 39354055, - "total": 108893186, - "total_sparsity": 63.859947122862216 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 81.00283822138127, - "f1": 88.2671108560581 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 32.23066467285156, - "eval_elapsed_time": 39.6229472043924 - }, - "speedup": 1.1974432856757005, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 405824, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2826999, - "linear_dense_total": 4718592, - "linear_nnz": 3232823, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 543872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2868775, - "linear_dense_total": 4718592, - "linear_nnz": 3412647, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 325760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1081633, - "linear_dense_total": 4718592, - "linear_nnz": 1407393, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 174016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 591690, - "linear_dense_total": 4718592, - "linear_nnz": 765706, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 613248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2943603, - "linear_dense_total": 4718592, - "linear_nnz": 3556851, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 791424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2916774, - "linear_dense_total": 4718592, - "linear_nnz": 3708198, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 819072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2876887, - "linear_dense_total": 4718592, - "linear_nnz": 3695959, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 788928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2855670, - "linear_dense_total": 4718592, - "linear_nnz": 3644598, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 761600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2724886, - "linear_dense_total": 4718592, - "linear_nnz": 3486486, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 686464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2427996, - "linear_dense_total": 4718592, - "linear_nnz": 3114460, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 602496, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2013542, - "linear_dense_total": 4718592, - "linear_nnz": 2616038, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 381632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1394109, - "linear_dense_total": 4718592, - "linear_nnz": 1775741, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 34416900, - "linear_sparsity": 59.47837829589844, - "linear_total": 84934656, - "nnz": 58360680, - "total": 108893186, - "total_sparsity": 46.405572153982156 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l10--2021-01-19--16-58-45/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 81.01229895931883, - "f1": 88.16022239737082 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 32.31462289428711, - "eval_elapsed_time": 39.686994375661016 - }, - "speedup": 1.1943321489972945, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 404736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2826896, - "linear_dense_total": 4718592, - "linear_nnz": 3231632, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 543040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2868676, - "linear_dense_total": 4718592, - "linear_nnz": 3411716, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 322624, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1081551, - "linear_dense_total": 4718592, - "linear_nnz": 1404175, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 172288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 591605, - "linear_dense_total": 4718592, - "linear_nnz": 763893, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 614464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2943501, - "linear_dense_total": 4718592, - "linear_nnz": 3557965, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 790144, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2916630, - "linear_dense_total": 4718592, - "linear_nnz": 3706774, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 816832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2876748, - "linear_dense_total": 4718592, - "linear_nnz": 3693580, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 785920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2855585, - "linear_dense_total": 4718592, - "linear_nnz": 3641505, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 759424, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2724738, - "linear_dense_total": 4718592, - "linear_nnz": 3484162, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 687040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2427854, - "linear_dense_total": 4718592, - "linear_nnz": 3114894, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 603648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2013418, - "linear_dense_total": 4718592, - "linear_nnz": 2617066, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 379328, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1394031, - "linear_dense_total": 4718592, - "linear_nnz": 1773359, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 34400721, - "linear_sparsity": 59.49742705733687, - "linear_total": 84934656, - "nnz": 58344499, - "total": 108893186, - "total_sparsity": 46.42043166961797 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l20--2021-01-19--16-59-13/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.15137180700094, - "f1": 87.62280270760408 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 28.9650231628418, - "eval_elapsed_time": 36.364678455051035 - }, - "speedup": 1.3324482010041157, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 278464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2254373, - "linear_dense_total": 4718592, - "linear_nnz": 2532837, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 411200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2313203, - "linear_dense_total": 4718592, - "linear_nnz": 2724403, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 207872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 587562, - "linear_dense_total": 4718592, - "linear_nnz": 795434, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 115648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 304918, - "linear_dense_total": 4718592, - "linear_nnz": 420566, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 388544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2383637, - "linear_dense_total": 4718592, - "linear_nnz": 2772181, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 616064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2346825, - "linear_dense_total": 4718592, - "linear_nnz": 2962889, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 475392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2281407, - "linear_dense_total": 4718592, - "linear_nnz": 2756799, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 485760, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2255524, - "linear_dense_total": 4718592, - "linear_nnz": 2741284, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 436416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2089830, - "linear_dense_total": 4718592, - "linear_nnz": 2526246, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 473664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1759353, - "linear_dense_total": 4718592, - "linear_nnz": 2233017, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 292096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1360596, - "linear_dense_total": 4718592, - "linear_nnz": 1652692, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 260864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 795671, - "linear_dense_total": 4718592, - "linear_nnz": 1056535, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 25174883, - "linear_sparsity": 70.35970452391072, - "linear_total": 84934656, - "nnz": 49113499, - "total": 108893186, - "total_sparsity": 54.89754611459343 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.59981078524125, - "f1": 86.70965342219107 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 27.427432876586913, - "eval_elapsed_time": 34.77788851317018 - }, - "speedup": 1.407145655192423, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 185152, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1640271, - "linear_dense_total": 4718592, - "linear_nnz": 1825423, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 309376, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1701856, - "linear_dense_total": 4718592, - "linear_nnz": 2011232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 140224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 283313, - "linear_dense_total": 4718592, - "linear_nnz": 423537, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 82304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 154892, - "linear_dense_total": 4718592, - "linear_nnz": 237196, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 266368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1747153, - "linear_dense_total": 4718592, - "linear_nnz": 2013521, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 452288, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1699193, - "linear_dense_total": 4718592, - "linear_nnz": 2151481, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 315584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1622345, - "linear_dense_total": 4718592, - "linear_nnz": 1937929, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 324160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1582184, - "linear_dense_total": 4718592, - "linear_nnz": 1906344, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 264448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1396319, - "linear_dense_total": 4718592, - "linear_nnz": 1660767, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 312704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1106218, - "linear_dense_total": 4718592, - "linear_nnz": 1418922, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 176128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 797060, - "linear_dense_total": 4718592, - "linear_nnz": 973188, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 178368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 396240, - "linear_dense_total": 4718592, - "linear_nnz": 574608, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 17134148, - "linear_sparsity": 79.82667051715615, - "linear_total": 84934656, - "nnz": 41069735, - "total": 108893186, - "total_sparsity": 62.28438480989986 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.78902554399244, - "f1": 86.80367154149816 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 27.48367044067383, - "eval_elapsed_time": 34.82450146274641 - }, - "speedup": 1.404266329298368, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 181120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1630256, - "linear_dense_total": 4718592, - "linear_nnz": 1811376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 307392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1692246, - "linear_dense_total": 4718592, - "linear_nnz": 1999638, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 136448, - "linear_attention_total": 2359296, - "linear_dense_nnz": 281138, - "linear_dense_total": 4718592, - "linear_nnz": 417586, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 82304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 153935, - "linear_dense_total": 4718592, - "linear_nnz": 236239, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 266880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1737446, - "linear_dense_total": 4718592, - "linear_nnz": 2004326, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 442304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1689801, - "linear_dense_total": 4718592, - "linear_nnz": 2132105, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 309632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1613097, - "linear_dense_total": 4718592, - "linear_nnz": 1922729, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 313664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1572856, - "linear_dense_total": 4718592, - "linear_nnz": 1886520, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 259072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1387577, - "linear_dense_total": 4718592, - "linear_nnz": 1646649, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 306112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1098827, - "linear_dense_total": 4718592, - "linear_nnz": 1404939, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 173184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 791446, - "linear_dense_total": 4718592, - "linear_nnz": 964630, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 172928, - "linear_attention_total": 2359296, - "linear_dense_nnz": 393190, - "linear_dense_total": 4718592, - "linear_nnz": 566118, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 16992855, - "linear_sparsity": 79.99302546183267, - "linear_total": 84934656, - "nnz": 40928357, - "total": 108893186, - "total_sparsity": 62.414216625088 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v8-a8-l40--2021-01-19--16-59-40/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.6092715231788, - "f1": 86.70267601348202 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 27.478721130371095, - "eval_elapsed_time": 34.80613293591887 - }, - "speedup": 1.4045192577290035, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 180736, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1630123, - "linear_dense_total": 4718592, - "linear_nnz": 1810859, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 305920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1692103, - "linear_dense_total": 4718592, - "linear_nnz": 1998023, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 135616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 281100, - "linear_dense_total": 4718592, - "linear_nnz": 416716, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 81536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 153912, - "linear_dense_total": 4718592, - "linear_nnz": 235448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 263936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1737263, - "linear_dense_total": 4718592, - "linear_nnz": 2001199, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 442496, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1689622, - "linear_dense_total": 4718592, - "linear_nnz": 2132118, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 306304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1612927, - "linear_dense_total": 4718592, - "linear_nnz": 1919231, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 312128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1572769, - "linear_dense_total": 4718592, - "linear_nnz": 1884897, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 258304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1387454, - "linear_dense_total": 4718592, - "linear_nnz": 1645758, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 305856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1098709, - "linear_dense_total": 4718592, - "linear_nnz": 1404565, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 172480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 791310, - "linear_dense_total": 4718592, - "linear_nnz": 963790, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 170944, - "linear_attention_total": 2359296, - "linear_dense_nnz": 393127, - "linear_dense_total": 4718592, - "linear_nnz": 564071, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 16976675, - "linear_sparsity": 80.01207540064682, - "linear_total": 84934656, - "nnz": 40912185, - "total": 108893186, - "total_sparsity": 62.42906787574385 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l10--2021-01-20--18-58-11/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.93661305581836, - "f1": 88.34112193061533 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 16, - "dense_block_rows": 16, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 30.13610975646973, - "eval_elapsed_time": 37.54532916797325 - }, - "speedup": 1.2806693802635063, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 517888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4068608, - "linear_dense_total": 4718592, - "linear_nnz": 4586496, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 641536, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4202752, - "linear_dense_total": 4718592, - "linear_nnz": 4844288, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 415488, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1090304, - "linear_dense_total": 4718592, - "linear_nnz": 1505792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 254720, - "linear_attention_total": 2359296, - "linear_dense_nnz": 947200, - "linear_dense_total": 4718592, - "linear_nnz": 1201920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 841472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4313856, - "linear_dense_total": 4718592, - "linear_nnz": 5155328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1072896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4336128, - "linear_dense_total": 4718592, - "linear_nnz": 5409024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1068800, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4317184, - "linear_dense_total": 4718592, - "linear_nnz": 5385984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 961792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4311040, - "linear_dense_total": 4718592, - "linear_nnz": 5272832, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 986880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4141568, - "linear_dense_total": 4718592, - "linear_nnz": 5128448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 905472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3820032, - "linear_dense_total": 4718592, - "linear_nnz": 4725504, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 756224, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3085568, - "linear_dense_total": 4718592, - "linear_nnz": 3841792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 463360, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1416448, - "linear_dense_total": 4718592, - "linear_nnz": 1879808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 48937216, - "linear_sparsity": 42.38251109182099, - "linear_total": 84934656, - "nnz": 72878482, - "total": 108893186, - "total_sparsity": 33.07342297799975 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.02838221381268, - "f1": 87.51569063636161 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 16, - "dense_block_rows": 16, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 26.3544778213501, - "eval_elapsed_time": 33.69302155217156 - }, - "speedup": 1.4644339860190774, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 418816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3448576, - "linear_dense_total": 4718592, - "linear_nnz": 3867392, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 553728, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3696384, - "linear_dense_total": 4718592, - "linear_nnz": 4250112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 291584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 654592, - "linear_dense_total": 4718592, - "linear_nnz": 946176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 168960, - "linear_attention_total": 2359296, - "linear_dense_nnz": 615424, - "linear_dense_total": 4718592, - "linear_nnz": 784384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 562432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3956992, - "linear_dense_total": 4718592, - "linear_nnz": 4519424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 827392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3982336, - "linear_dense_total": 4718592, - "linear_nnz": 4809728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 790016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3912960, - "linear_dense_total": 4718592, - "linear_nnz": 4702976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 701696, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3899648, - "linear_dense_total": 4718592, - "linear_nnz": 4601344, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 667392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3576064, - "linear_dense_total": 4718592, - "linear_nnz": 4243456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 700416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2968832, - "linear_dense_total": 4718592, - "linear_nnz": 3669248, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 437504, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1966592, - "linear_dense_total": 4718592, - "linear_nnz": 2404096, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 361472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 693504, - "linear_dense_total": 4718592, - "linear_nnz": 1054976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 39853312, - "linear_sparsity": 53.077678915895056, - "linear_total": 84934656, - "nnz": 63788226, - "total": 108893186, - "total_sparsity": 41.42128782970864 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l20--2021-01-20--18-58-39/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.55534531693472, - "f1": 87.439750439335 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 16, - "dense_block_rows": 16, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 26.32847610473633, - "eval_elapsed_time": 33.60846929671243 - }, - "speedup": 1.4658802450943298, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 416256, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3444992, - "linear_dense_total": 4718592, - "linear_nnz": 3861248, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 541952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3708416, - "linear_dense_total": 4718592, - "linear_nnz": 4250368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 285184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 647936, - "linear_dense_total": 4718592, - "linear_nnz": 933120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 174080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 611328, - "linear_dense_total": 4718592, - "linear_nnz": 785408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 555520, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3977216, - "linear_dense_total": 4718592, - "linear_nnz": 4532736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 802816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4019968, - "linear_dense_total": 4718592, - "linear_nnz": 4822784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 774400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3939840, - "linear_dense_total": 4718592, - "linear_nnz": 4714240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 686592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3897600, - "linear_dense_total": 4718592, - "linear_nnz": 4584192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 656384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3568640, - "linear_dense_total": 4718592, - "linear_nnz": 4225024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 676864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2957312, - "linear_dense_total": 4718592, - "linear_nnz": 3634176, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 432640, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1931264, - "linear_dense_total": 4718592, - "linear_nnz": 2363904, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 345344, - "linear_attention_total": 2359296, - "linear_dense_nnz": 685056, - "linear_dense_total": 4718592, - "linear_nnz": 1030400, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 39737600, - "linear_sparsity": 53.213915412808646, - "linear_total": 84934656, - "nnz": 63672482, - "total": 108893186, - "total_sparsity": 41.52757914531035 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a16-l40--2021-01-20--18-59-08/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.11731315042573, - "f1": 86.14927876930865 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 16, - "attention_block_rows": 16, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 16, - "dense_block_rows": 16, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 23.35162329864502, - "eval_elapsed_time": 30.60480569722131 - }, - "speedup": 1.6527498971607057, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 331008, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2354688, - "linear_dense_total": 4718592, - "linear_nnz": 2685696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 432384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2826240, - "linear_dense_total": 4718592, - "linear_nnz": 3258624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 203008, - "linear_attention_total": 2359296, - "linear_dense_nnz": 415744, - "linear_dense_total": 4718592, - "linear_nnz": 618752, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 112128, - "linear_attention_total": 2359296, - "linear_dense_nnz": 423168, - "linear_dense_total": 4718592, - "linear_nnz": 535296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 423936, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3302144, - "linear_dense_total": 4718592, - "linear_nnz": 3726080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 669440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3248128, - "linear_dense_total": 4718592, - "linear_nnz": 3917568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 453632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3193600, - "linear_dense_total": 4718592, - "linear_nnz": 3647232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 473856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3119616, - "linear_dense_total": 4718592, - "linear_nnz": 3593472, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 445952, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2493696, - "linear_dense_total": 4718592, - "linear_nnz": 2939648, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 490752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1891072, - "linear_dense_total": 4718592, - "linear_nnz": 2381824, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 275712, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1108736, - "linear_dense_total": 4718592, - "linear_nnz": 1384448, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 258304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 348928, - "linear_dense_total": 4718592, - "linear_nnz": 607232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 29295872, - "linear_sparsity": 65.5077522183642, - "linear_total": 84934656, - "nnz": 53223538, - "total": 108893186, - "total_sparsity": 51.12316945157615 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l10--2021-01-20--19-01-04/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 81.47587511825922, - "f1": 88.58172107792693 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 34.2993692779541, - "eval_elapsed_time": 41.87211530236527 - }, - "speedup": 1.1252216532791355, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 428592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3980096, - "linear_dense_total": 4718592, - "linear_nnz": 4408688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 545744, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4015584, - "linear_dense_total": 4718592, - "linear_nnz": 4561328, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 329968, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2092032, - "linear_dense_total": 4718592, - "linear_nnz": 2422000, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 190816, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1335104, - "linear_dense_total": 4718592, - "linear_nnz": 1525920, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 729664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4061440, - "linear_dense_total": 4718592, - "linear_nnz": 4791104, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 851472, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4062640, - "linear_dense_total": 4718592, - "linear_nnz": 4914112, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 960992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4047744, - "linear_dense_total": 4718592, - "linear_nnz": 5008736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 902768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4006096, - "linear_dense_total": 4718592, - "linear_nnz": 4908864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 861120, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3920672, - "linear_dense_total": 4718592, - "linear_nnz": 4781792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 759664, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3732848, - "linear_dense_total": 4718592, - "linear_nnz": 4492512, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 670096, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3391392, - "linear_dense_total": 4718592, - "linear_nnz": 4061488, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 444064, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2661776, - "linear_dense_total": 4718592, - "linear_nnz": 3105840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 48982384, - "linear_sparsity": 42.329331386236504, - "linear_total": 84934656, - "nnz": 72930262, - "total": 108893186, - "total_sparsity": 33.025871793300276 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l20--2021-01-20--19-01-34/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.52980132450331, - "f1": 88.02284574429551 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 32.3459995803833, - "eval_elapsed_time": 40.03914254019037 - }, - "speedup": 1.1931736074335828, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 261808, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3592944, - "linear_dense_total": 4718592, - "linear_nnz": 3854752, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 407856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3665376, - "linear_dense_total": 4718592, - "linear_nnz": 4073232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 212544, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1095184, - "linear_dense_total": 4718592, - "linear_nnz": 1307728, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 122704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 702496, - "linear_dense_total": 4718592, - "linear_nnz": 825200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 470352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3747664, - "linear_dense_total": 4718592, - "linear_nnz": 4218016, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 586320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3731872, - "linear_dense_total": 4718592, - "linear_nnz": 4318192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 598112, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3696160, - "linear_dense_total": 4718592, - "linear_nnz": 4294272, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 540976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3650592, - "linear_dense_total": 4718592, - "linear_nnz": 4191568, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 518320, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3500640, - "linear_dense_total": 4718592, - "linear_nnz": 4018960, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 494608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3197872, - "linear_dense_total": 4718592, - "linear_nnz": 3692480, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 381872, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2682864, - "linear_dense_total": 4718592, - "linear_nnz": 3064736, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 281888, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1600800, - "linear_dense_total": 4718592, - "linear_nnz": 1882688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 39741824, - "linear_sparsity": 53.208942177854944, - "linear_total": 84934656, - "nnz": 63685078, - "total": 108893186, - "total_sparsity": 41.51601184669167 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l20--2021-01-20--19-01-34/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.09460737937559, - "f1": 87.80889686617203 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 32.19205239105224, - "eval_elapsed_time": 39.82947535999119 - }, - "speedup": 1.1988795413397866, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 258016, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3584960, - "linear_dense_total": 4718592, - "linear_nnz": 3842976, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 404784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3659360, - "linear_dense_total": 4718592, - "linear_nnz": 4064144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 209136, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1083920, - "linear_dense_total": 4718592, - "linear_nnz": 1293056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 120976, - "linear_attention_total": 2359296, - "linear_dense_nnz": 697408, - "linear_dense_total": 4718592, - "linear_nnz": 818384, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 460752, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3741328, - "linear_dense_total": 4718592, - "linear_nnz": 4202080, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 577184, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3724032, - "linear_dense_total": 4718592, - "linear_nnz": 4301216, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 587792, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3689648, - "linear_dense_total": 4718592, - "linear_nnz": 4277440, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 530480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3641984, - "linear_dense_total": 4718592, - "linear_nnz": 4172464, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 508336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3491408, - "linear_dense_total": 4718592, - "linear_nnz": 3999744, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 486304, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3187056, - "linear_dense_total": 4718592, - "linear_nnz": 3673360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 374032, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2669344, - "linear_dense_total": 4718592, - "linear_nnz": 3043376, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 276992, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1586976, - "linear_dense_total": 4718592, - "linear_nnz": 1863968, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 39552208, - "linear_sparsity": 53.432191448447156, - "linear_total": 84934656, - "nnz": 63495382, - "total": 108893186, - "total_sparsity": 41.69021558428826 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a4-l40--2021-01-20--19-02-03/checkpoint-105000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.64049195837275, - "f1": 87.31499809166372 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 4, - "attention_block_rows": 4, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 4, - "dense_block_rows": 4, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 28.556625274658202, - "eval_elapsed_time": 36.13367621740326 - }, - "speedup": 1.3515039902008532, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 172416, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2978704, - "linear_dense_total": 4718592, - "linear_nnz": 3151120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 308192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3103168, - "linear_dense_total": 4718592, - "linear_nnz": 3411360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 140384, - "linear_attention_total": 2359296, - "linear_dense_nnz": 526736, - "linear_dense_total": 4718592, - "linear_nnz": 667120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 84608, - "linear_attention_total": 2359296, - "linear_dense_nnz": 377248, - "linear_dense_total": 4718592, - "linear_nnz": 461856, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 285568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3205568, - "linear_dense_total": 4718592, - "linear_nnz": 3491136, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 437904, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3165264, - "linear_dense_total": 4718592, - "linear_nnz": 3603168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 321040, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3089840, - "linear_dense_total": 4718592, - "linear_nnz": 3410880, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 332784, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3023632, - "linear_dense_total": 4718592, - "linear_nnz": 3356416, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 288464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2784432, - "linear_dense_total": 4718592, - "linear_nnz": 3072896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 328464, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2354768, - "linear_dense_total": 4718592, - "linear_nnz": 2683232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 204832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1752368, - "linear_dense_total": 4718592, - "linear_nnz": 1957200, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 189616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 715936, - "linear_dense_total": 4718592, - "linear_nnz": 905552, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30171936, - "linear_sparsity": 64.47629575376158, - "linear_total": 84934656, - "nnz": 54109530, - "total": 108893186, - "total_sparsity": 50.30953543778212 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l10--2021-01-20--18-59-37/checkpoint-110000": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 81.10690633869442, - "f1": 88.3744311515211 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 32.22343955230713, - "eval_elapsed_time": 39.62965265568346 - }, - "speedup": 1.1977117757004876, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 446336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4004672, - "linear_dense_total": 4718592, - "linear_nnz": 4451008, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 597248, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4077632, - "linear_dense_total": 4718592, - "linear_nnz": 4674880, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 362688, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1517184, - "linear_dense_total": 4718592, - "linear_nnz": 1879872, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 218432, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1064384, - "linear_dense_total": 4718592, - "linear_nnz": 1282816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 799296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4155072, - "linear_dense_total": 4718592, - "linear_nnz": 4954368, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 950208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4165440, - "linear_dense_total": 4718592, - "linear_nnz": 5115648, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1022400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4152448, - "linear_dense_total": 4718592, - "linear_nnz": 5174848, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 914368, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4106624, - "linear_dense_total": 4718592, - "linear_nnz": 5020992, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 918208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3961088, - "linear_dense_total": 4718592, - "linear_nnz": 4879296, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 832704, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3684992, - "linear_dense_total": 4718592, - "linear_nnz": 4517696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 715648, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3207040, - "linear_dense_total": 4718592, - "linear_nnz": 3922688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 467072, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2117440, - "linear_dense_total": 4718592, - "linear_nnz": 2584512, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 48458624, - "linear_sparsity": 42.94599368248457, - "linear_total": 84934656, - "nnz": 72403618, - "total": 108893186, - "total_sparsity": 33.50950536060172 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l10--2021-01-20--18-59-37/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.82308420056765, - "f1": 88.21300800880684 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10.0 - }, - "speed": { - "cuda_eval_elapsed_time": 32.25489320373535, - "eval_elapsed_time": 39.64649308426306 - }, - "speedup": 1.1965438162077555, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 446080, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4004864, - "linear_dense_total": 4718592, - "linear_nnz": 4450944, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 597312, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4076928, - "linear_dense_total": 4718592, - "linear_nnz": 4674240, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 362048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1517376, - "linear_dense_total": 4718592, - "linear_nnz": 1879424, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 217216, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1063808, - "linear_dense_total": 4718592, - "linear_nnz": 1281024, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 800192, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4155456, - "linear_dense_total": 4718592, - "linear_nnz": 4955648, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 948864, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4165760, - "linear_dense_total": 4718592, - "linear_nnz": 5114624, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1019200, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4152640, - "linear_dense_total": 4718592, - "linear_nnz": 5171840, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 915392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 4108416, - "linear_dense_total": 4718592, - "linear_nnz": 5023808, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 916160, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3960384, - "linear_dense_total": 4718592, - "linear_nnz": 4876544, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 834176, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3685056, - "linear_dense_total": 4718592, - "linear_nnz": 4519232, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 713856, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3207936, - "linear_dense_total": 4718592, - "linear_nnz": 3921792, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 465600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2115456, - "linear_dense_total": 4718592, - "linear_nnz": 2581056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 48450176, - "linear_sparsity": 42.95594015239198, - "linear_total": 84934656, - "nnz": 72395170, - "total": 108893186, - "total_sparsity": 33.51726342179023 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l20--2021-01-20--19-00-06/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.05676442762535, - "f1": 87.66615713942541 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 20.0 - }, - "speed": { - "cuda_eval_elapsed_time": 28.86345721435547, - "eval_elapsed_time": 36.22357800696045 - }, - "speedup": 1.3371368758339826, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 326336, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3501120, - "linear_dense_total": 4718592, - "linear_nnz": 3827456, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 487552, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3653568, - "linear_dense_total": 4718592, - "linear_nnz": 4141120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 238208, - "linear_attention_total": 2359296, - "linear_dense_nnz": 756608, - "linear_dense_total": 4718592, - "linear_nnz": 994816, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 141568, - "linear_attention_total": 2359296, - "linear_dense_nnz": 622848, - "linear_dense_total": 4718592, - "linear_nnz": 764416, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 487616, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3801472, - "linear_dense_total": 4718592, - "linear_nnz": 4289088, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 712832, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3800064, - "linear_dense_total": 4718592, - "linear_nnz": 4512896, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 646272, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3743872, - "linear_dense_total": 4718592, - "linear_nnz": 4390144, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 625600, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3691328, - "linear_dense_total": 4718592, - "linear_nnz": 4316928, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 575808, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3461056, - "linear_dense_total": 4718592, - "linear_nnz": 4036864, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 579392, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3012928, - "linear_dense_total": 4718592, - "linear_nnz": 3592320, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 405632, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2347776, - "linear_dense_total": 4718592, - "linear_nnz": 2753408, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 317440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1001344, - "linear_dense_total": 4718592, - "linear_nnz": 1318784, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 38938240, - "linear_sparsity": 54.1550624517747, - "linear_total": 84934656, - "nnz": 62877338, - "total": 108893186, - "total_sparsity": 42.257784614732465 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v9-a8-l40--2021-01-20--19-00-35/checkpoint-110660": { - "config": { - "_name_or_path": "bert-base-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.76064333017976, - "f1": 86.75922108224064 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 8, - "attention_block_rows": 8, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 8, - "dense_block_rows": 8, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40.0 - }, - "speed": { - "cuda_eval_elapsed_time": 25.933858947753908, - "eval_elapsed_time": 33.4375456799753 - }, - "speedup": 1.4881855061802785, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 241280, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2752704, - "linear_dense_total": 4718592, - "linear_nnz": 2993984, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 379584, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2951104, - "linear_dense_total": 4718592, - "linear_nnz": 3330688, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 172352, - "linear_attention_total": 2359296, - "linear_dense_nnz": 419008, - "linear_dense_total": 4718592, - "linear_nnz": 591360, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 104768, - "linear_attention_total": 2359296, - "linear_dense_nnz": 388288, - "linear_dense_total": 4718592, - "linear_nnz": 493056, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 322880, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3194240, - "linear_dense_total": 4718592, - "linear_nnz": 3517120, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 565440, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3155136, - "linear_dense_total": 4718592, - "linear_nnz": 3720576, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 390400, - "linear_attention_total": 2359296, - "linear_dense_nnz": 3064768, - "linear_dense_total": 4718592, - "linear_nnz": 3455168, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 406592, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2993600, - "linear_dense_total": 4718592, - "linear_nnz": 3400192, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 356480, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2631680, - "linear_dense_total": 4718592, - "linear_nnz": 2988160, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 409920, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2067776, - "linear_dense_total": 4718592, - "linear_nnz": 2477696, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 242048, - "linear_attention_total": 2359296, - "linear_dense_nnz": 1370368, - "linear_dense_total": 4718592, - "linear_nnz": 1612416, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 224896, - "linear_attention_total": 2359296, - "linear_dense_nnz": 472768, - "linear_dense_total": 4718592, - "linear_nnz": 697664, - "linear_total": 7077888, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 29278080, - "linear_sparsity": 65.52870008680556, - "linear_total": 84934656, - "nnz": 53211146, - "total": 108893186, - "total_sparsity": 51.13454941064908 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/opt/ml/output", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20.0, - "optimize_model_before_eval": "disabled", - "output_dir": "/opt/ml/model", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/opt/ml/model", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13/checkpoint-22132": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6, - 7, - 9, - 11 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 9 - ], - "10": [ - 1, - 4, - 5, - 6, - 7, - 8 - ], - "11": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 11 - ], - "2": [ - 8, - 4, - 7 - ], - "3": [ - 2, - 4, - 6, - 7 - ], - "4": [ - 1, - 2 - ], - "5": [ - 1, - 2, - 5, - 6, - 7 - ], - "6": [ - 0, - 2, - 3, - 7 - ], - "7": [ - 1, - 3, - 6, - 7, - 11 - ], - "8": [ - 0, - 8, - 3, - 4 - ], - "9": [ - 1, - 3, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.69063386944181, - "f1": 88.06386432532665 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13/checkpoint-110660", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 17.390718185424806, - "eval_elapsed_time": 24.534384376835078 - }, - "speedup": 2.2192523962418718, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 634368, - "linear_dense_total": 4718592, - "linear_nnz": 1420800, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 916992, - "linear_dense_total": 4718592, - "linear_nnz": 1703424, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 112128, - "linear_dense_total": 4718592, - "linear_nnz": 1291776, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 313344, - "linear_dense_total": 4718592, - "linear_nnz": 903168, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 1016832, - "linear_dense_total": 4718592, - "linear_nnz": 2786304, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 1076736, - "linear_dense_total": 4718592, - "linear_nnz": 2649600, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 1158144, - "linear_dense_total": 4718592, - "linear_nnz": 3124224, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1073664, - "linear_dense_total": 4718592, - "linear_nnz": 2449920, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 815616, - "linear_dense_total": 4718592, - "linear_nnz": 2388480, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 629760, - "linear_dense_total": 4718592, - "linear_nnz": 2006016, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 337920, - "linear_dense_total": 4718592, - "linear_nnz": 1910784, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 139776, - "linear_dense_total": 4718592, - "linear_nnz": 1122816, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 23757312, - "linear_sparsity": 67.07467643051771, - "linear_total": 72155136, - "nnz": 47671853, - "total": 96101186, - "total_sparsity": 50.394105437991165 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl0-5--2021-01-21--00-54-13", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40/checkpoint-22132": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 1, - 2, - 4, - 5, - 6 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8 - ], - "10": [ - 1, - 4, - 5, - 6, - 7, - 8 - ], - "11": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 11 - ], - "2": [ - 8, - 4, - 7 - ], - "3": [ - 2, - 4, - 6, - 7 - ], - "4": [ - 1, - 2 - ], - "5": [ - 1, - 2, - 6, - 7 - ], - "6": [ - 0, - 2, - 3, - 7 - ], - "7": [ - 1, - 3, - 6, - 7, - 11 - ], - "8": [ - 0, - 8, - 4 - ], - "9": [ - 1, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.23651844843897, - "f1": 87.68464122182475 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40/checkpoint-110660", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 17.154361824035647, - "eval_elapsed_time": 24.304617804009467 - }, - "speedup": 2.249829716853412, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 297984, - "linear_dense_total": 4718592, - "linear_nnz": 1477632, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 483840, - "linear_dense_total": 4718592, - "linear_nnz": 1466880, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 73728, - "linear_dense_total": 4718592, - "linear_nnz": 1253376, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 159744, - "linear_dense_total": 4718592, - "linear_nnz": 749568, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 619008, - "linear_dense_total": 4718592, - "linear_nnz": 2388480, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 657408, - "linear_dense_total": 4718592, - "linear_nnz": 2230272, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 705024, - "linear_dense_total": 4718592, - "linear_nnz": 2671104, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 668160, - "linear_dense_total": 4718592, - "linear_nnz": 2241024, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 516096, - "linear_dense_total": 4718592, - "linear_nnz": 2088960, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 384000, - "linear_dense_total": 4718592, - "linear_nnz": 1760256, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 204288, - "linear_dense_total": 4718592, - "linear_nnz": 1973760, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 92160, - "linear_dense_total": 4718592, - "linear_nnz": 1271808, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 21573120, - "linear_sparsity": 70.58269101876675, - "linear_total": 73334784, - "nnz": 45486623, - "total": 97281986, - "total_sparsity": 53.24250164876363 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l10-dl1--2021-01-21--00-53-40", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-22132": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6, - 7, - 9, - 11 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 9 - ], - "10": [ - 1, - 4, - 5, - 6, - 7, - 8, - 9 - ], - "11": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 11 - ], - "2": [ - 1, - 2, - 4, - 7, - 8, - 11 - ], - "3": [ - 2, - 4, - 6, - 7, - 10 - ], - "4": [ - 0, - 1, - 2, - 6, - 11 - ], - "5": [ - 0, - 1, - 2, - 5, - 6, - 7, - 11 - ], - "6": [ - 0, - 2, - 3, - 4, - 7, - 10 - ], - "7": [ - 1, - 3, - 6, - 7, - 11 - ], - "8": [ - 0, - 2, - 3, - 4, - 6, - 8 - ], - "9": [ - 1, - 3, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 79.4228949858089, - "f1": 87.22907143184382 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44/checkpoint-110000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 14.848762104034424, - "eval_elapsed_time": 22.048566517885774 - }, - "speedup": 2.5991656903766382, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 411648, - "linear_dense_total": 4718592, - "linear_nnz": 1198080, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 592896, - "linear_dense_total": 4718592, - "linear_nnz": 1379328, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 87552, - "linear_dense_total": 4718592, - "linear_nnz": 1070592, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 199680, - "linear_dense_total": 4718592, - "linear_nnz": 789504, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 698880, - "linear_dense_total": 4718592, - "linear_nnz": 1878528, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 714240, - "linear_dense_total": 4718592, - "linear_nnz": 2090496, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 834048, - "linear_dense_total": 4718592, - "linear_nnz": 2210304, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 743424, - "linear_dense_total": 4718592, - "linear_nnz": 1726464, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 568320, - "linear_dense_total": 4718592, - "linear_nnz": 1747968, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 450048, - "linear_dense_total": 4718592, - "linear_nnz": 1826304, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 264192, - "linear_dense_total": 4718592, - "linear_nnz": 1443840, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 101376, - "linear_dense_total": 4718592, - "linear_nnz": 1084416, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 18445824, - "linear_sparsity": 73.42200779036827, - "linear_total": 69402624, - "nnz": 42356011, - "total": 93345986, - "total_sparsity": 54.62471091151151 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl0-5--2021-01-21--00-55-44", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15/checkpoint-22132": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6, - 7, - 9, - 11 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 9 - ], - "10": [ - 1, - 4, - 5, - 6, - 7, - 8, - 9 - ], - "11": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 11 - ], - "2": [ - 1, - 2, - 4, - 7, - 8, - 11 - ], - "3": [ - 2, - 4, - 6, - 7 - ], - "4": [ - 1, - 2, - 4, - 6, - 11 - ], - "5": [ - 0, - 1, - 2, - 5, - 6, - 7, - 11 - ], - "6": [ - 0, - 2, - 3, - 4, - 7, - 10 - ], - "7": [ - 1, - 3, - 6, - 7, - 11 - ], - "8": [ - 0, - 2, - 3, - 4, - 8 - ], - "9": [ - 1, - 3, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.82686849574267, - "f1": 86.75497848244157 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15/checkpoint-110660", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 14.354346725463868, - "eval_elapsed_time": 21.489493974950165 - }, - "speedup": 2.68869031405704, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 181248, - "linear_dense_total": 4718592, - "linear_nnz": 967680, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 299520, - "linear_dense_total": 4718592, - "linear_nnz": 1085952, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 58368, - "linear_dense_total": 4718592, - "linear_nnz": 1041408, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 96768, - "linear_dense_total": 4718592, - "linear_nnz": 686592, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 407040, - "linear_dense_total": 4718592, - "linear_nnz": 1586688, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 440832, - "linear_dense_total": 4718592, - "linear_nnz": 2013696, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 496128, - "linear_dense_total": 4718592, - "linear_nnz": 1872384, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 433152, - "linear_dense_total": 4718592, - "linear_nnz": 1416192, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 337920, - "linear_dense_total": 4718592, - "linear_nnz": 1517568, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 268800, - "linear_dense_total": 4718592, - "linear_nnz": 1645056, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 158208, - "linear_dense_total": 4718592, - "linear_nnz": 1534464, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 73728, - "linear_dense_total": 4718592, - "linear_nnz": 1056768, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 16424448, - "linear_sparsity": 76.46786971830986, - "linear_total": 69795840, - "nnz": 40333447, - "total": 93739586, - "total_sparsity": 56.972876965767696 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l20-dl1--2021-01-21--00-55-15", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-22132": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 2, - 4, - 5, - 6, - 7 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8 - ], - "10": [ - 1, - 4, - 5, - 6, - 7 - ], - "11": [ - 0, - 2, - 5, - 6, - 7, - 8, - 11 - ], - "2": [ - 8, - 4, - 7 - ], - "3": [ - 2, - 4, - 6 - ], - "4": [ - 2 - ], - "5": [ - 1, - 2 - ], - "6": [ - 2, - 3, - 7 - ], - "7": [ - 11, - 3, - 6, - 7 - ], - "8": [ - 0, - 4 - ], - "9": [ - 1, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 81.69347209082308, - "f1": 88.72194531479171 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45/checkpoint-95000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 20.951393741607667, - "eval_elapsed_time": 28.213609586004168 - }, - "speedup": 1.8420919143305463, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1125888, - "linear_dense_total": 4718592, - "linear_nnz": 2502144, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 1285632, - "linear_dense_total": 4718592, - "linear_nnz": 2268672, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 168960, - "linear_dense_total": 4718592, - "linear_nnz": 1545216, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 485376, - "linear_dense_total": 4718592, - "linear_nnz": 1468416, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 1523712, - "linear_dense_total": 4718592, - "linear_nnz": 3293184, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 1555968, - "linear_dense_total": 4718592, - "linear_nnz": 3325440, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 2162688, - "linear_attention_total": 2162688, - "linear_dense_nnz": 1617408, - "linear_dense_total": 4718592, - "linear_nnz": 3780096, - "linear_total": 6881280, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 1514496, - "linear_dense_total": 4718592, - "linear_nnz": 3480576, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 1135104, - "linear_dense_total": 4718592, - "linear_nnz": 2904576, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 847872, - "linear_dense_total": 4718592, - "linear_nnz": 2420736, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 474624, - "linear_dense_total": 4718592, - "linear_nnz": 2440704, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 208896, - "linear_dense_total": 4718592, - "linear_nnz": 1388544, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 30818304, - "linear_sparsity": 59.1796875, - "linear_total": 75497472, - "nnz": 54738530, - "total": 99446786, - "total_sparsity": 44.95696422004025 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl0-5--2021-01-21--00-52-45", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-20000": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8 - ], - "10": [ - 1, - 4, - 5, - 7 - ], - "11": [ - 0, - 2, - 5, - 6, - 7, - 8, - 11 - ], - "2": [ - 8, - 4 - ], - "3": [ - 2, - 4, - 6 - ], - "4": [ - 2 - ], - "5": [ - 1, - 2 - ], - "6": [ - 2, - 3, - 7 - ], - "7": [ - 11, - 3, - 6, - 7 - ], - "8": [ - 0, - 4 - ], - "9": [ - 1, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.86092715231788, - "f1": 88.26868699204444 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-110660", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 19.458871116638186, - "eval_elapsed_time": 26.62503844080493 - }, - "speedup": 1.98338294004996, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 526848, - "linear_dense_total": 4718592, - "linear_nnz": 1903104, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 752640, - "linear_dense_total": 4718592, - "linear_nnz": 1735680, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 98304, - "linear_dense_total": 4718592, - "linear_nnz": 1671168, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 262656, - "linear_dense_total": 4718592, - "linear_nnz": 1245696, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 873984, - "linear_dense_total": 4718592, - "linear_nnz": 2840064, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 952320, - "linear_dense_total": 4718592, - "linear_nnz": 2721792, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 2162688, - "linear_attention_total": 2162688, - "linear_dense_nnz": 1046016, - "linear_dense_total": 4718592, - "linear_nnz": 3208704, - "linear_total": 6881280, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 986112, - "linear_dense_total": 4718592, - "linear_nnz": 2952192, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 740352, - "linear_dense_total": 4718592, - "linear_nnz": 2509824, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 559104, - "linear_dense_total": 4718592, - "linear_nnz": 2131968, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 293376, - "linear_dense_total": 4718592, - "linear_nnz": 2259456, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 113664, - "linear_dense_total": 4718592, - "linear_nnz": 1293312, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 26472960, - "linear_sparsity": 65.11698510362694, - "linear_total": 75890688, - "nnz": 50390485, - "total": 99840386, - "total_sparsity": 49.52895614806617 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-22132": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8 - ], - "10": [ - 1, - 4, - 5, - 7 - ], - "11": [ - 0, - 2, - 5, - 6, - 7, - 8, - 11 - ], - "2": [ - 8, - 4 - ], - "3": [ - 2, - 4, - 6 - ], - "4": [ - 2 - ], - "5": [ - 1, - 2 - ], - "6": [ - 2, - 3, - 7 - ], - "7": [ - 11, - 3, - 6, - 7 - ], - "8": [ - 0, - 4 - ], - "9": [ - 1, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.87038789025544, - "f1": 88.24613086360249 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_aws/aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16/checkpoint-110660", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 19.453059474945068, - "eval_elapsed_time": 26.577815205790102 - }, - "speedup": 1.9839754797994356, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 526848, - "linear_dense_total": 4718592, - "linear_nnz": 1903104, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 752640, - "linear_dense_total": 4718592, - "linear_nnz": 1735680, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 98304, - "linear_dense_total": 4718592, - "linear_nnz": 1671168, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 262656, - "linear_dense_total": 4718592, - "linear_nnz": 1245696, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 873984, - "linear_dense_total": 4718592, - "linear_nnz": 2840064, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 952320, - "linear_dense_total": 4718592, - "linear_nnz": 2721792, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 2162688, - "linear_attention_total": 2162688, - "linear_dense_nnz": 1046016, - "linear_dense_total": 4718592, - "linear_nnz": 3208704, - "linear_total": 6881280, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 986112, - "linear_dense_total": 4718592, - "linear_nnz": 2952192, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 740352, - "linear_dense_total": 4718592, - "linear_nnz": 2509824, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 559104, - "linear_dense_total": 4718592, - "linear_nnz": 2131968, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1966080, - "linear_attention_total": 1966080, - "linear_dense_nnz": 293376, - "linear_dense_total": 4718592, - "linear_nnz": 2259456, - "linear_total": 6684672, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 113664, - "linear_dense_total": 4718592, - "linear_nnz": 1293312, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 26472960, - "linear_sparsity": 65.11698510362694, - "linear_total": 75890688, - "nnz": 50390485, - "total": 99840386, - "total_sparsity": 49.52895614806617 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_aws_nn-pruning-v10-a32-l5-dl1--2021-01-21--00-52-16", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-15000": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6, - 7, - 9, - 11 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 9 - ], - "10": [ - 1, - 4, - 5, - 6, - 7, - 8 - ], - "11": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 11 - ], - "2": [ - 8, - 11, - 4, - 7 - ], - "3": [ - 2, - 4, - 6, - 7 - ], - "4": [ - 1, - 2, - 11 - ], - "5": [ - 1, - 2, - 5, - 6, - 7, - 11 - ], - "6": [ - 0, - 2, - 3, - 7, - 10 - ], - "7": [ - 1, - 3, - 6, - 7, - 11 - ], - "8": [ - 0, - 8, - 3, - 4 - ], - "9": [ - 1, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.80416272469253, - "f1": 88.20260662536118 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 18.439563426971436, - "eval_elapsed_time": 25.7331585730426 - }, - "speedup": 2.0930209740713988, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1339392, - "linear_dense_total": 4718592, - "linear_nnz": 2125824, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1571328, - "linear_dense_total": 4718592, - "linear_nnz": 2357760, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 187392, - "linear_dense_total": 4718592, - "linear_nnz": 1367040, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 574464, - "linear_dense_total": 4718592, - "linear_nnz": 1164288, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 1744896, - "linear_dense_total": 4718592, - "linear_nnz": 3317760, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 1761792, - "linear_dense_total": 4718592, - "linear_nnz": 3334656, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 1726464, - "linear_dense_total": 4718592, - "linear_nnz": 3495936, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 1629696, - "linear_dense_total": 4718592, - "linear_nnz": 2809344, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1270272, - "linear_dense_total": 4718592, - "linear_nnz": 2646528, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 987648, - "linear_dense_total": 4718592, - "linear_nnz": 2363904, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 546816, - "linear_dense_total": 4718592, - "linear_nnz": 2119680, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 248832, - "linear_dense_total": 4718592, - "linear_nnz": 1428480, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 28531200, - "linear_sparsity": 60.1326407967033, - "linear_total": 71565312, - "nnz": 52448657, - "total": 95510786, - "total_sparsity": 45.08614241746477 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-20000": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6, - 7, - 9, - 11 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 9 - ], - "10": [ - 1, - 4, - 5, - 6, - 7, - 8 - ], - "11": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 11 - ], - "2": [ - 8, - 11, - 4, - 7 - ], - "3": [ - 2, - 4, - 6, - 7 - ], - "4": [ - 1, - 2, - 11 - ], - "5": [ - 1, - 2, - 5, - 6, - 7, - 11 - ], - "6": [ - 0, - 2, - 3, - 7, - 10 - ], - "7": [ - 1, - 3, - 6, - 7, - 11 - ], - "8": [ - 0, - 8, - 3, - 4 - ], - "9": [ - 1, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.6717123935667, - "f1": 88.128983727943 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 18.875869693756105, - "eval_elapsed_time": 26.023085076361895 - }, - "speedup": 2.044641843344449, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1339392, - "linear_dense_total": 4718592, - "linear_nnz": 2125824, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1571328, - "linear_dense_total": 4718592, - "linear_nnz": 2357760, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 187392, - "linear_dense_total": 4718592, - "linear_nnz": 1367040, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 574464, - "linear_dense_total": 4718592, - "linear_nnz": 1164288, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 1744896, - "linear_dense_total": 4718592, - "linear_nnz": 3317760, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 1761792, - "linear_dense_total": 4718592, - "linear_nnz": 3334656, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 1726464, - "linear_dense_total": 4718592, - "linear_nnz": 3495936, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 1629696, - "linear_dense_total": 4718592, - "linear_nnz": 2809344, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1270272, - "linear_dense_total": 4718592, - "linear_nnz": 2646528, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 987648, - "linear_dense_total": 4718592, - "linear_nnz": 2363904, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 546816, - "linear_dense_total": 4718592, - "linear_nnz": 2119680, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 248832, - "linear_dense_total": 4718592, - "linear_nnz": 1428480, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 28531200, - "linear_sparsity": 60.1326407967033, - "linear_total": 71565312, - "nnz": 52448657, - "total": 95510786, - "total_sparsity": 45.08614241746477 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6, - 7, - 9, - 11 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 9 - ], - "10": [ - 1, - 4, - 5, - 6, - 7, - 8 - ], - "11": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 11 - ], - "2": [ - 8, - 11, - 4, - 7 - ], - "3": [ - 2, - 4, - 6, - 7 - ], - "4": [ - 1, - 2, - 11 - ], - "5": [ - 1, - 2, - 5, - 6, - 7, - 11 - ], - "6": [ - 0, - 2, - 3, - 7, - 10 - ], - "7": [ - 1, - 3, - 6, - 7, - 11 - ], - "8": [ - 0, - 8, - 3, - 4 - ], - "9": [ - 1, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.68117313150425, - "f1": 88.11014400914335 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 18.42703369522095, - "eval_elapsed_time": 25.61402732366696 - }, - "speedup": 2.094444154371984, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1339392, - "linear_dense_total": 4718592, - "linear_nnz": 2125824, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1571328, - "linear_dense_total": 4718592, - "linear_nnz": 2357760, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 187392, - "linear_dense_total": 4718592, - "linear_nnz": 1367040, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 574464, - "linear_dense_total": 4718592, - "linear_nnz": 1164288, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 1744896, - "linear_dense_total": 4718592, - "linear_nnz": 3317760, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 1761792, - "linear_dense_total": 4718592, - "linear_nnz": 3334656, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1769472, - "linear_attention_total": 1769472, - "linear_dense_nnz": 1726464, - "linear_dense_total": 4718592, - "linear_nnz": 3495936, - "linear_total": 6488064, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 1629696, - "linear_dense_total": 4718592, - "linear_nnz": 2809344, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1270272, - "linear_dense_total": 4718592, - "linear_nnz": 2646528, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 987648, - "linear_dense_total": 4718592, - "linear_nnz": 2363904, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 546816, - "linear_dense_total": 4718592, - "linear_nnz": 2119680, - "linear_total": 6291456, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 248832, - "linear_dense_total": 4718592, - "linear_nnz": 1428480, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 28531200, - "linear_sparsity": 60.1326407967033, - "linear_total": 71565312, - "nnz": 52448657, - "total": 95510786, - "total_sparsity": 45.08614241746477 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl10_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-20000": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6, - 7, - 9, - 11 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 9 - ], - "10": [ - 1, - 4, - 5, - 6, - 7, - 8, - 9 - ], - "11": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 11 - ], - "2": [ - 1, - 2, - 4, - 7, - 8, - 11 - ], - "3": [ - 2, - 4, - 6, - 7, - 10 - ], - "4": [ - 0, - 1, - 2, - 6, - 11 - ], - "5": [ - 0, - 1, - 2, - 5, - 6, - 7, - 11 - ], - "6": [ - 0, - 2, - 3, - 4, - 7, - 10 - ], - "7": [ - 1, - 3, - 6, - 7, - 11 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 6, - 8 - ], - "9": [ - 1, - 3, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.01892147587512, - "f1": 87.70568682399205 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 15.845825397491456, - "eval_elapsed_time": 23.001069764140993 - }, - "speedup": 2.4356189745395627, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 847872, - "linear_dense_total": 4718592, - "linear_nnz": 1634304, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1101312, - "linear_dense_total": 4718592, - "linear_nnz": 1887744, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 147456, - "linear_dense_total": 4718592, - "linear_nnz": 1130496, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 365568, - "linear_dense_total": 4718592, - "linear_nnz": 955392, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 1221120, - "linear_dense_total": 4718592, - "linear_nnz": 2400768, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1211904, - "linear_dense_total": 4718592, - "linear_nnz": 2588160, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1279488, - "linear_dense_total": 4718592, - "linear_nnz": 2655744, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 1216512, - "linear_dense_total": 4718592, - "linear_nnz": 2199552, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 952320, - "linear_dense_total": 4718592, - "linear_nnz": 2131968, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 715776, - "linear_dense_total": 4718592, - "linear_nnz": 2092032, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 434688, - "linear_dense_total": 4718592, - "linear_nnz": 1417728, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 172032, - "linear_dense_total": 4718592, - "linear_nnz": 1155072, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 22248960, - "linear_sparsity": 67.85111860795455, - "linear_total": 69206016, - "nnz": 46161559, - "total": 93149186, - "total_sparsity": 50.443411282198426 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6, - 7, - 9, - 11 - ], - "1": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 9 - ], - "10": [ - 1, - 4, - 5, - 6, - 7, - 8, - 9 - ], - "11": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 11 - ], - "2": [ - 1, - 2, - 4, - 7, - 8, - 11 - ], - "3": [ - 2, - 4, - 6, - 7, - 10 - ], - "4": [ - 0, - 1, - 2, - 6, - 11 - ], - "5": [ - 0, - 1, - 2, - 5, - 6, - 7, - 11 - ], - "6": [ - 0, - 2, - 3, - 4, - 7, - 10 - ], - "7": [ - 1, - 3, - 6, - 7, - 11 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 6, - 8 - ], - "9": [ - 1, - 3, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.02838221381268, - "f1": 87.70940223967354 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-90000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 15.838374267578125, - "eval_elapsed_time": 22.999519595876336 - }, - "speedup": 2.436764806371294, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 847872, - "linear_dense_total": 4718592, - "linear_nnz": 1634304, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1101312, - "linear_dense_total": 4718592, - "linear_nnz": 1887744, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 147456, - "linear_dense_total": 4718592, - "linear_nnz": 1130496, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 365568, - "linear_dense_total": 4718592, - "linear_nnz": 955392, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 1221120, - "linear_dense_total": 4718592, - "linear_nnz": 2400768, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1211904, - "linear_dense_total": 4718592, - "linear_nnz": 2588160, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 1279488, - "linear_dense_total": 4718592, - "linear_nnz": 2655744, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 1216512, - "linear_dense_total": 4718592, - "linear_nnz": 2199552, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 952320, - "linear_dense_total": 4718592, - "linear_nnz": 2131968, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 715776, - "linear_dense_total": 4718592, - "linear_nnz": 2092032, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 434688, - "linear_dense_total": 4718592, - "linear_nnz": 1417728, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 172032, - "linear_dense_total": 4718592, - "linear_nnz": 1155072, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 22248960, - "linear_sparsity": 67.85111860795455, - "linear_total": 69206016, - "nnz": 46161559, - "total": 93149186, - "total_sparsity": 50.443411282198426 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl20_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-22132": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 12, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 2, - 4, - 5, - 6, - 7, - 9, - 11 - ], - "1": [ - 0, - 1, - 2, - 3, - 5, - 6, - 7, - 8, - 9 - ], - "10": [ - 1, - 4, - 5, - 6, - 7, - 8, - 9 - ], - "11": [ - 0, - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 11 - ], - "2": [ - 1, - 2, - 3, - 4, - 5, - 7, - 8, - 10, - 11 - ], - "3": [ - 2, - 3, - 4, - 6, - 7, - 10 - ], - "4": [ - 0, - 1, - 2, - 6, - 7, - 8, - 9, - 11 - ], - "5": [ - 0, - 1, - 2, - 5, - 6, - 7, - 11 - ], - "6": [ - 0, - 2, - 3, - 4, - 6, - 7, - 10 - ], - "7": [ - 1, - 3, - 6, - 7, - 11 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8 - ], - "9": [ - 1, - 3, - 4, - 5, - 7, - 9, - 10 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.63765373699148, - "f1": 86.69392512957342 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test3/hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1/checkpoint-110000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 13.783753513336181, - "eval_elapsed_time": 20.85535095212981 - }, - "speedup": 2.799991523936488, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 482304, - "linear_dense_total": 4718592, - "linear_nnz": 1268736, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "1": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 706560, - "linear_dense_total": 4718592, - "linear_nnz": 1296384, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "10": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 121344, - "linear_dense_total": 4718592, - "linear_nnz": 1104384, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "11": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 215040, - "linear_dense_total": 4718592, - "linear_nnz": 804864, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "2": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 850944, - "linear_dense_total": 4718592, - "linear_nnz": 1440768, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "3": { - "linear_attention_nnz": 1179648, - "linear_attention_total": 1179648, - "linear_dense_nnz": 826368, - "linear_dense_total": 4718592, - "linear_nnz": 2006016, - "linear_total": 5898240, - "nnz": 768, - "total": 768 - }, - "4": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 923136, - "linear_dense_total": 4718592, - "linear_nnz": 1709568, - "linear_total": 5505024, - "nnz": 768, - "total": 768 - }, - "5": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 880128, - "linear_dense_total": 4718592, - "linear_nnz": 1863168, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "6": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 645120, - "linear_dense_total": 4718592, - "linear_nnz": 1628160, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "7": { - "linear_attention_nnz": 1376256, - "linear_attention_total": 1376256, - "linear_dense_nnz": 525312, - "linear_dense_total": 4718592, - "linear_nnz": 1901568, - "linear_total": 6094848, - "nnz": 768, - "total": 768 - }, - "8": { - "linear_attention_nnz": 589824, - "linear_attention_total": 589824, - "linear_dense_nnz": 333312, - "linear_dense_total": 4718592, - "linear_nnz": 923136, - "linear_total": 5308416, - "nnz": 768, - "total": 768 - }, - "9": { - "linear_attention_nnz": 983040, - "linear_attention_total": 983040, - "linear_dense_nnz": 113664, - "linear_dense_total": 4718592, - "linear_nnz": 1096704, - "linear_total": 5701632, - "nnz": 768, - "total": 768 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 17043456, - "linear_sparsity": 74.57844574780059, - "linear_total": 67043328, - "nnz": 40951962, - "total": 90984386, - "total_sparsity": 54.990121052199 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 4, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_hp_od-output__squad_test3_es-steps_nte20_ls250_est5000_rn-output__squad_test3_dpm-sigmoied_threshold:1d_alt_apme-sigmoied_threshold_aowd0_bm1_abr32_abc32_it0_fw10_r-l1_rfl40_dl0.25_dtnop-csarron__bert-base-uncased-squad-v1", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10/checkpoint-47500": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 2, - 3, - 4, - 7, - 8, - 9, - 10, - 13, - 14, - 15 - ], - "1": [ - 0, - 1, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "10": [ - 7, - 8, - 10, - 12, - 13, - 14 - ], - "11": [ - 0, - 2, - 4, - 5, - 8, - 10 - ], - "12": [ - 10, - 3, - 13, - 6 - ], - "13": [ - 2, - 10, - 4, - 12 - ], - "14": [ - 2, - 3, - 4, - 8, - 11 - ], - "15": [ - 0, - 5, - 6, - 7, - 11, - 12 - ], - "16": [ - 3, - 6, - 8, - 13, - 15 - ], - "17": [ - 0, - 2, - 4, - 11, - 15 - ], - "18": [ - 2, - 3, - 5, - 11, - 13 - ], - "19": [ - 0, - 2, - 3, - 4, - 9, - 10, - 11, - 15 - ], - "2": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14, - 15 - ], - "20": [ - 0, - 1, - 3, - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 15 - ], - "21": [ - 2, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "22": [ - 0, - 1, - 2, - 3, - 4, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14 - ], - "23": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 9, - 10, - 12, - 13, - 14 - ], - "3": [ - 0, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 10, - 14, - 15 - ], - "4": [ - 0, - 1, - 2, - 4, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "5": [ - 0, - 1, - 3, - 4, - 5, - 6, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "6": [ - 0, - 1, - 2, - 3, - 5, - 6, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "7": [ - 0, - 1, - 2, - 4, - 5, - 6, - 8, - 10, - 11, - 13, - 14 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 12, - 13, - 14, - 15 - ], - "9": [ - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 12, - 13, - 15 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 83.74645222327341, - "f1": 90.16320537561052 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-215000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 37.53850735473633, - "eval_elapsed_time": 44.58338421070948 - }, - "speedup": 1.0281280670181348, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 192512, - "linear_dense_total": 8388608, - "linear_nnz": 1765376, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 794624, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 995328, - "linear_dense_total": 8388608, - "linear_nnz": 3616768, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1032192, - "linear_dense_total": 8388608, - "linear_nnz": 3653632, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1241088, - "linear_dense_total": 8388608, - "linear_nnz": 4386816, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1179648, - "linear_dense_total": 8388608, - "linear_nnz": 4325376, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 909312, - "linear_dense_total": 8388608, - "linear_nnz": 3792896, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 681984, - "linear_dense_total": 8388608, - "linear_nnz": 3303424, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 473088, - "linear_dense_total": 8388608, - "linear_nnz": 3356672, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 368640, - "linear_dense_total": 8388608, - "linear_nnz": 3252224, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 321536, - "linear_dense_total": 8388608, - "linear_nnz": 3205120, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 2367488, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 286720, - "linear_dense_total": 8388608, - "linear_nnz": 811008, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 899072, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 77824, - "linear_dense_total": 8388608, - "linear_nnz": 1388544, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 79872, - "linear_dense_total": 8388608, - "linear_nnz": 866304, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 182272, - "linear_dense_total": 8388608, - "linear_nnz": 1230848, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 413696, - "linear_dense_total": 8388608, - "linear_nnz": 1724416, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 466944, - "linear_dense_total": 8388608, - "linear_nnz": 991232, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 552960, - "linear_dense_total": 8388608, - "linear_nnz": 1077248, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 608256, - "linear_dense_total": 8388608, - "linear_nnz": 1394688, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 438272, - "linear_dense_total": 8388608, - "linear_nnz": 1748992, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 661504, - "linear_dense_total": 8388608, - "linear_nnz": 1710080, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 747520, - "linear_dense_total": 8388608, - "linear_nnz": 2320384, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 53983232, - "linear_sparsity": 77.76137284017278, - "linear_total": 242745344, - "nnz": 85952121, - "total": 274806402, - "total_sparsity": 68.72266425583491 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 10, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10/checkpoint-55330": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 2, - 3, - 4, - 7, - 8, - 9, - 10, - 13, - 14, - 15 - ], - "1": [ - 0, - 1, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "10": [ - 7, - 8, - 10, - 12, - 13, - 14 - ], - "11": [ - 0, - 2, - 4, - 5, - 8, - 10 - ], - "12": [ - 10, - 3, - 13, - 6 - ], - "13": [ - 2, - 10, - 4, - 12 - ], - "14": [ - 2, - 3, - 4, - 8, - 11 - ], - "15": [ - 0, - 5, - 6, - 7, - 11, - 12 - ], - "16": [ - 3, - 6, - 8, - 13, - 15 - ], - "17": [ - 0, - 2, - 4, - 11, - 15 - ], - "18": [ - 2, - 3, - 5, - 11, - 13 - ], - "19": [ - 0, - 2, - 3, - 4, - 9, - 10, - 11, - 15 - ], - "2": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14, - 15 - ], - "20": [ - 0, - 1, - 3, - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 15 - ], - "21": [ - 2, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "22": [ - 0, - 1, - 2, - 3, - 4, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14 - ], - "23": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 9, - 10, - 12, - 13, - 14 - ], - "3": [ - 0, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 10, - 14, - 15 - ], - "4": [ - 0, - 1, - 2, - 4, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "5": [ - 0, - 1, - 3, - 4, - 5, - 6, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "6": [ - 0, - 1, - 2, - 3, - 5, - 6, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "7": [ - 0, - 1, - 2, - 4, - 5, - 6, - 8, - 10, - 11, - 13, - 14 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 12, - 13, - 14, - 15 - ], - "9": [ - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 12, - 13, - 15 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 83.62346263008514, - "f1": 90.10843526218638 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-215000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 37.30008307647705, - "eval_elapsed_time": 44.469506811816245 - }, - "speedup": 1.034699920808227, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 192512, - "linear_dense_total": 8388608, - "linear_nnz": 1765376, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 794624, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 995328, - "linear_dense_total": 8388608, - "linear_nnz": 3616768, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1032192, - "linear_dense_total": 8388608, - "linear_nnz": 3653632, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1241088, - "linear_dense_total": 8388608, - "linear_nnz": 4386816, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1179648, - "linear_dense_total": 8388608, - "linear_nnz": 4325376, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 909312, - "linear_dense_total": 8388608, - "linear_nnz": 3792896, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 681984, - "linear_dense_total": 8388608, - "linear_nnz": 3303424, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 473088, - "linear_dense_total": 8388608, - "linear_nnz": 3356672, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 368640, - "linear_dense_total": 8388608, - "linear_nnz": 3252224, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2883584, - "linear_attention_total": 2883584, - "linear_dense_nnz": 321536, - "linear_dense_total": 8388608, - "linear_nnz": 3205120, - "linear_total": 11272192, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 2367488, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 286720, - "linear_dense_total": 8388608, - "linear_nnz": 811008, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 899072, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 77824, - "linear_dense_total": 8388608, - "linear_nnz": 1388544, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 79872, - "linear_dense_total": 8388608, - "linear_nnz": 866304, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 182272, - "linear_dense_total": 8388608, - "linear_nnz": 1230848, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 413696, - "linear_dense_total": 8388608, - "linear_nnz": 1724416, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 466944, - "linear_dense_total": 8388608, - "linear_nnz": 991232, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 552960, - "linear_dense_total": 8388608, - "linear_nnz": 1077248, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 608256, - "linear_dense_total": 8388608, - "linear_nnz": 1394688, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 438272, - "linear_dense_total": 8388608, - "linear_nnz": 1748992, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 661504, - "linear_dense_total": 8388608, - "linear_nnz": 1710080, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 747520, - "linear_dense_total": 8388608, - "linear_nnz": 2320384, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 53983232, - "linear_sparsity": 77.76137284017278, - "linear_total": 242745344, - "nnz": 85952121, - "total": 274806402, - "total_sparsity": 68.72266425583491 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 10, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25/checkpoint-22500": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "1": [ - 0, - 1, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "10": [ - 0, - 7, - 8, - 10, - 12, - 13 - ], - "11": [ - 0, - 1, - 2, - 4, - 5, - 8, - 10 - ], - "12": [ - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 13 - ], - "13": [ - 10, - 2, - 3, - 12 - ], - "14": [ - 1, - 2, - 3, - 4, - 8, - 11 - ], - "15": [ - 0, - 5, - 6, - 7, - 11, - 12 - ], - "16": [ - 3, - 6, - 8, - 10, - 13, - 15 - ], - "17": [ - 0, - 2, - 11, - 15 - ], - "18": [ - 2, - 3, - 5, - 9, - 11, - 12, - 13 - ], - "19": [ - 0, - 2, - 3, - 4, - 9, - 10, - 11, - 13, - 15 - ], - "2": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14, - 15 - ], - "20": [ - 1, - 3, - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 15 - ], - "21": [ - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "22": [ - 0, - 1, - 2, - 3, - 4, - 5, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "23": [ - 1, - 2, - 4, - 5, - 6, - 7, - 9, - 10, - 12, - 13, - 14 - ], - "3": [ - 0, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 10, - 13, - 14, - 15 - ], - "4": [ - 0, - 1, - 2, - 4, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "5": [ - 0, - 1, - 3, - 4, - 5, - 6, - 8, - 9, - 10, - 11, - 12, - 13, - 14 - ], - "6": [ - 0, - 1, - 2, - 3, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "7": [ - 0, - 1, - 2, - 4, - 5, - 6, - 8, - 10, - 11, - 13, - 14, - 15 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "9": [ - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 9, - 12, - 13, - 15 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 84.399243140965, - "f1": 90.84270784891945 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10_d0.25/checkpoint-210000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 41.6732879486084, - "eval_elapsed_time": 48.981834520120174 - }, - "speedup": 0.9261182619659336, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 835584, - "linear_dense_total": 8388608, - "linear_nnz": 2408448, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1275904, - "linear_dense_total": 8388608, - "linear_nnz": 1800192, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2410496, - "linear_dense_total": 8388608, - "linear_nnz": 5031936, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2510848, - "linear_dense_total": 8388608, - "linear_nnz": 4870144, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 2660352, - "linear_dense_total": 8388608, - "linear_nnz": 4757504, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 2605056, - "linear_dense_total": 8388608, - "linear_nnz": 5750784, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2299904, - "linear_dense_total": 8388608, - "linear_nnz": 4921344, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1699840, - "linear_dense_total": 8388608, - "linear_nnz": 4321280, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1402880, - "linear_dense_total": 8388608, - "linear_nnz": 4024320, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1097728, - "linear_dense_total": 8388608, - "linear_nnz": 4243456, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 3260416, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 1835008, - "linear_attention_total": 1835008, - "linear_dense_nnz": 739328, - "linear_dense_total": 8388608, - "linear_nnz": 2574336, - "linear_total": 10223616, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1359872, - "linear_dense_total": 8388608, - "linear_nnz": 1884160, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 358400, - "linear_dense_total": 8388608, - "linear_nnz": 1406976, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 194560, - "linear_dense_total": 8388608, - "linear_nnz": 1243136, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 180224, - "linear_dense_total": 8388608, - "linear_nnz": 704512, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 323584, - "linear_dense_total": 8388608, - "linear_nnz": 1634304, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1685504, - "linear_dense_total": 8388608, - "linear_nnz": 2734080, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1767424, - "linear_dense_total": 8388608, - "linear_nnz": 2291712, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1873920, - "linear_dense_total": 8388608, - "linear_nnz": 2660352, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 2054144, - "linear_dense_total": 8388608, - "linear_nnz": 2578432, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1773568, - "linear_dense_total": 8388608, - "linear_nnz": 2822144, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1968128, - "linear_dense_total": 8388608, - "linear_nnz": 2492416, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 1986560, - "linear_dense_total": 8388608, - "linear_nnz": 3297280, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 73713664, - "linear_sparsity": 69.16718064692982, - "linear_total": 239075328, - "nnz": 105691291, - "total": 271133698, - "total_sparsity": 61.01875503501597 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 5, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25/checkpoint-25000": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "1": [ - 0, - 1, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "10": [ - 0, - 7, - 8, - 10, - 12, - 13 - ], - "11": [ - 0, - 1, - 2, - 4, - 5, - 8, - 10 - ], - "12": [ - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 13 - ], - "13": [ - 10, - 2, - 3, - 12 - ], - "14": [ - 1, - 2, - 3, - 4, - 8, - 11 - ], - "15": [ - 0, - 5, - 6, - 7, - 11, - 12 - ], - "16": [ - 3, - 6, - 8, - 10, - 13, - 15 - ], - "17": [ - 0, - 2, - 11, - 15 - ], - "18": [ - 2, - 3, - 5, - 9, - 11, - 12, - 13 - ], - "19": [ - 0, - 2, - 3, - 4, - 9, - 10, - 11, - 13, - 15 - ], - "2": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14, - 15 - ], - "20": [ - 1, - 3, - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 15 - ], - "21": [ - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "22": [ - 0, - 1, - 2, - 3, - 4, - 5, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "23": [ - 1, - 2, - 4, - 5, - 6, - 7, - 9, - 10, - 12, - 13, - 14 - ], - "3": [ - 0, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 10, - 13, - 14, - 15 - ], - "4": [ - 0, - 1, - 2, - 4, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "5": [ - 0, - 1, - 3, - 4, - 5, - 6, - 8, - 9, - 10, - 11, - 12, - 13, - 14 - ], - "6": [ - 0, - 1, - 2, - 3, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "7": [ - 0, - 1, - 2, - 4, - 5, - 6, - 8, - 10, - 11, - 13, - 14, - 15 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "9": [ - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 9, - 12, - 13, - 15 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 84.20056764427625, - "f1": 90.73941291394593 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10_d0.25/checkpoint-210000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 41.50353849792481, - "eval_elapsed_time": 49.06402187002823 - }, - "speedup": 0.929906085171529, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 835584, - "linear_dense_total": 8388608, - "linear_nnz": 2408448, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1275904, - "linear_dense_total": 8388608, - "linear_nnz": 1800192, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2410496, - "linear_dense_total": 8388608, - "linear_nnz": 5031936, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2510848, - "linear_dense_total": 8388608, - "linear_nnz": 4870144, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 2660352, - "linear_dense_total": 8388608, - "linear_nnz": 4757504, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 2605056, - "linear_dense_total": 8388608, - "linear_nnz": 5750784, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2299904, - "linear_dense_total": 8388608, - "linear_nnz": 4921344, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1699840, - "linear_dense_total": 8388608, - "linear_nnz": 4321280, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1402880, - "linear_dense_total": 8388608, - "linear_nnz": 4024320, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1097728, - "linear_dense_total": 8388608, - "linear_nnz": 4243456, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 3260416, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 1835008, - "linear_attention_total": 1835008, - "linear_dense_nnz": 739328, - "linear_dense_total": 8388608, - "linear_nnz": 2574336, - "linear_total": 10223616, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1359872, - "linear_dense_total": 8388608, - "linear_nnz": 1884160, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 358400, - "linear_dense_total": 8388608, - "linear_nnz": 1406976, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 194560, - "linear_dense_total": 8388608, - "linear_nnz": 1243136, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 180224, - "linear_dense_total": 8388608, - "linear_nnz": 704512, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 323584, - "linear_dense_total": 8388608, - "linear_nnz": 1634304, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1685504, - "linear_dense_total": 8388608, - "linear_nnz": 2734080, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1767424, - "linear_dense_total": 8388608, - "linear_nnz": 2291712, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1873920, - "linear_dense_total": 8388608, - "linear_nnz": 2660352, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 2054144, - "linear_dense_total": 8388608, - "linear_nnz": 2578432, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1773568, - "linear_dense_total": 8388608, - "linear_nnz": 2822144, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1968128, - "linear_dense_total": 8388608, - "linear_nnz": 2492416, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 1986560, - "linear_dense_total": 8388608, - "linear_nnz": 3297280, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 73713664, - "linear_sparsity": 69.16718064692982, - "linear_total": 239075328, - "nnz": 105691291, - "total": 271133698, - "total_sparsity": 61.01875503501597 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 5, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25/checkpoint-27665": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "1": [ - 0, - 1, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "10": [ - 0, - 7, - 8, - 10, - 12, - 13 - ], - "11": [ - 0, - 1, - 2, - 4, - 5, - 8, - 10 - ], - "12": [ - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 13 - ], - "13": [ - 10, - 2, - 3, - 12 - ], - "14": [ - 1, - 2, - 3, - 4, - 8, - 11 - ], - "15": [ - 0, - 5, - 6, - 7, - 11, - 12 - ], - "16": [ - 3, - 6, - 8, - 10, - 13, - 15 - ], - "17": [ - 0, - 2, - 11, - 15 - ], - "18": [ - 2, - 3, - 5, - 9, - 11, - 12, - 13 - ], - "19": [ - 0, - 2, - 3, - 4, - 9, - 10, - 11, - 13, - 15 - ], - "2": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14, - 15 - ], - "20": [ - 1, - 3, - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 15 - ], - "21": [ - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "22": [ - 0, - 1, - 2, - 3, - 4, - 5, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "23": [ - 1, - 2, - 4, - 5, - 6, - 7, - 9, - 10, - 12, - 13, - 14 - ], - "3": [ - 0, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 10, - 13, - 14, - 15 - ], - "4": [ - 0, - 1, - 2, - 4, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "5": [ - 0, - 1, - 3, - 4, - 5, - 6, - 8, - 9, - 10, - 11, - 12, - 13, - 14 - ], - "6": [ - 0, - 1, - 2, - 3, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "7": [ - 0, - 1, - 2, - 4, - 5, - 6, - 8, - 10, - 11, - 13, - 14, - 15 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "9": [ - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 9, - 12, - 13, - 15 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 84.2100283822138, - "f1": 90.70141124860059 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10_d0.25/checkpoint-210000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 41.6272840423584, - "eval_elapsed_time": 49.02150737866759 - }, - "speedup": 0.9271417507348992, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 835584, - "linear_dense_total": 8388608, - "linear_nnz": 2408448, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1275904, - "linear_dense_total": 8388608, - "linear_nnz": 1800192, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2410496, - "linear_dense_total": 8388608, - "linear_nnz": 5031936, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2510848, - "linear_dense_total": 8388608, - "linear_nnz": 4870144, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 2660352, - "linear_dense_total": 8388608, - "linear_nnz": 4757504, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 2605056, - "linear_dense_total": 8388608, - "linear_nnz": 5750784, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2299904, - "linear_dense_total": 8388608, - "linear_nnz": 4921344, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1699840, - "linear_dense_total": 8388608, - "linear_nnz": 4321280, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1402880, - "linear_dense_total": 8388608, - "linear_nnz": 4024320, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1097728, - "linear_dense_total": 8388608, - "linear_nnz": 4243456, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 3260416, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 1835008, - "linear_attention_total": 1835008, - "linear_dense_nnz": 739328, - "linear_dense_total": 8388608, - "linear_nnz": 2574336, - "linear_total": 10223616, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1359872, - "linear_dense_total": 8388608, - "linear_nnz": 1884160, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 358400, - "linear_dense_total": 8388608, - "linear_nnz": 1406976, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 194560, - "linear_dense_total": 8388608, - "linear_nnz": 1243136, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 180224, - "linear_dense_total": 8388608, - "linear_nnz": 704512, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 323584, - "linear_dense_total": 8388608, - "linear_nnz": 1634304, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1685504, - "linear_dense_total": 8388608, - "linear_nnz": 2734080, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1767424, - "linear_dense_total": 8388608, - "linear_nnz": 2291712, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1873920, - "linear_dense_total": 8388608, - "linear_nnz": 2660352, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 2054144, - "linear_dense_total": 8388608, - "linear_nnz": 2578432, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1773568, - "linear_dense_total": 8388608, - "linear_nnz": 2822144, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1968128, - "linear_dense_total": 8388608, - "linear_nnz": 2492416, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 1986560, - "linear_dense_total": 8388608, - "linear_nnz": 3297280, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 73713664, - "linear_sparsity": 69.16718064692982, - "linear_total": 239075328, - "nnz": 105691291, - "total": 271133698, - "total_sparsity": 61.01875503501597 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 5, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25_v3_f91.03/checkpoint-55000": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "1": [ - 0, - 1, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "10": [ - 0, - 7, - 8, - 10, - 12, - 13 - ], - "11": [ - 0, - 1, - 2, - 4, - 5, - 8, - 10 - ], - "12": [ - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 13 - ], - "13": [ - 10, - 2, - 3, - 12 - ], - "14": [ - 1, - 2, - 3, - 4, - 8, - 11 - ], - "15": [ - 0, - 5, - 6, - 7, - 11, - 12 - ], - "16": [ - 3, - 6, - 8, - 10, - 13, - 15 - ], - "17": [ - 0, - 2, - 11, - 15 - ], - "18": [ - 2, - 3, - 5, - 9, - 11, - 12, - 13 - ], - "19": [ - 0, - 2, - 3, - 4, - 9, - 10, - 11, - 13, - 15 - ], - "2": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14, - 15 - ], - "20": [ - 1, - 3, - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 15 - ], - "21": [ - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "22": [ - 0, - 1, - 2, - 3, - 4, - 5, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "23": [ - 1, - 2, - 4, - 5, - 6, - 7, - 9, - 10, - 12, - 13, - 14 - ], - "3": [ - 0, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 10, - 13, - 14, - 15 - ], - "4": [ - 0, - 1, - 2, - 4, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "5": [ - 0, - 1, - 3, - 4, - 5, - 6, - 8, - 9, - 10, - 11, - 12, - 13, - 14 - ], - "6": [ - 0, - 1, - 2, - 3, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "7": [ - 0, - 1, - 2, - 4, - 5, - 6, - 8, - 10, - 11, - 13, - 14, - 15 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "9": [ - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 9, - 12, - 13, - 15 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 84.63576158940397, - "f1": 91.0266636723574 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10_d0.25/checkpoint-210000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 41.85157574462891, - "eval_elapsed_time": 49.32021534908563 - }, - "speedup": 0.9221729963255725, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 835584, - "linear_dense_total": 8388608, - "linear_nnz": 2408448, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1275904, - "linear_dense_total": 8388608, - "linear_nnz": 1800192, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2410496, - "linear_dense_total": 8388608, - "linear_nnz": 5031936, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2510848, - "linear_dense_total": 8388608, - "linear_nnz": 4870144, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 2660352, - "linear_dense_total": 8388608, - "linear_nnz": 4757504, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 2605056, - "linear_dense_total": 8388608, - "linear_nnz": 5750784, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2299904, - "linear_dense_total": 8388608, - "linear_nnz": 4921344, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1699840, - "linear_dense_total": 8388608, - "linear_nnz": 4321280, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1402880, - "linear_dense_total": 8388608, - "linear_nnz": 4024320, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1097728, - "linear_dense_total": 8388608, - "linear_nnz": 4243456, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 3260416, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 1835008, - "linear_attention_total": 1835008, - "linear_dense_nnz": 739328, - "linear_dense_total": 8388608, - "linear_nnz": 2574336, - "linear_total": 10223616, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1359872, - "linear_dense_total": 8388608, - "linear_nnz": 1884160, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 358400, - "linear_dense_total": 8388608, - "linear_nnz": 1406976, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 194560, - "linear_dense_total": 8388608, - "linear_nnz": 1243136, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 180224, - "linear_dense_total": 8388608, - "linear_nnz": 704512, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 323584, - "linear_dense_total": 8388608, - "linear_nnz": 1634304, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1685504, - "linear_dense_total": 8388608, - "linear_nnz": 2734080, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1767424, - "linear_dense_total": 8388608, - "linear_nnz": 2291712, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1873920, - "linear_dense_total": 8388608, - "linear_nnz": 2660352, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 2054144, - "linear_dense_total": 8388608, - "linear_nnz": 2578432, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1773568, - "linear_dense_total": 8388608, - "linear_nnz": 2822144, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1968128, - "linear_dense_total": 8388608, - "linear_nnz": 2492416, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 1986560, - "linear_dense_total": 8388608, - "linear_nnz": 3297280, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 73713664, - "linear_sparsity": 69.16718064692982, - "linear_total": 239075328, - "nnz": 105691291, - "total": 271133698, - "total_sparsity": 61.01875503501597 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 10, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25_v3_f91.03/checkpoint-55330": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "1": [ - 0, - 1, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "10": [ - 0, - 7, - 8, - 10, - 12, - 13 - ], - "11": [ - 0, - 1, - 2, - 4, - 5, - 8, - 10 - ], - "12": [ - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 13 - ], - "13": [ - 10, - 2, - 3, - 12 - ], - "14": [ - 1, - 2, - 3, - 4, - 8, - 11 - ], - "15": [ - 0, - 5, - 6, - 7, - 11, - 12 - ], - "16": [ - 3, - 6, - 8, - 10, - 13, - 15 - ], - "17": [ - 0, - 2, - 11, - 15 - ], - "18": [ - 2, - 3, - 5, - 9, - 11, - 12, - 13 - ], - "19": [ - 0, - 2, - 3, - 4, - 9, - 10, - 11, - 13, - 15 - ], - "2": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14, - 15 - ], - "20": [ - 1, - 3, - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 15 - ], - "21": [ - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "22": [ - 0, - 1, - 2, - 3, - 4, - 5, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "23": [ - 1, - 2, - 4, - 5, - 6, - 7, - 9, - 10, - 12, - 13, - 14 - ], - "3": [ - 0, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 10, - 13, - 14, - 15 - ], - "4": [ - 0, - 1, - 2, - 4, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "5": [ - 0, - 1, - 3, - 4, - 5, - 6, - 8, - 9, - 10, - 11, - 12, - 13, - 14 - ], - "6": [ - 0, - 1, - 2, - 3, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "7": [ - 0, - 1, - 2, - 4, - 5, - 6, - 8, - 10, - 11, - 13, - 14, - 15 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "9": [ - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 9, - 12, - 13, - 15 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 84.65468306527909, - "f1": 91.01004624462917 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10_d0.25/checkpoint-210000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 41.85431317138672, - "eval_elapsed_time": 49.428419118281454 - }, - "speedup": 0.922112682803639, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 835584, - "linear_dense_total": 8388608, - "linear_nnz": 2408448, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1275904, - "linear_dense_total": 8388608, - "linear_nnz": 1800192, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2410496, - "linear_dense_total": 8388608, - "linear_nnz": 5031936, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 2510848, - "linear_dense_total": 8388608, - "linear_nnz": 4870144, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 2660352, - "linear_dense_total": 8388608, - "linear_nnz": 4757504, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 2605056, - "linear_dense_total": 8388608, - "linear_nnz": 5750784, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 2299904, - "linear_dense_total": 8388608, - "linear_nnz": 4921344, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1699840, - "linear_dense_total": 8388608, - "linear_nnz": 4321280, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1402880, - "linear_dense_total": 8388608, - "linear_nnz": 4024320, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 3145728, - "linear_attention_total": 3145728, - "linear_dense_nnz": 1097728, - "linear_dense_total": 8388608, - "linear_nnz": 4243456, - "linear_total": 11534336, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 3260416, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 1835008, - "linear_attention_total": 1835008, - "linear_dense_nnz": 739328, - "linear_dense_total": 8388608, - "linear_nnz": 2574336, - "linear_total": 10223616, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1359872, - "linear_dense_total": 8388608, - "linear_nnz": 1884160, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 358400, - "linear_dense_total": 8388608, - "linear_nnz": 1406976, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 194560, - "linear_dense_total": 8388608, - "linear_nnz": 1243136, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 180224, - "linear_dense_total": 8388608, - "linear_nnz": 704512, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 323584, - "linear_dense_total": 8388608, - "linear_nnz": 1634304, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1685504, - "linear_dense_total": 8388608, - "linear_nnz": 2734080, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1767424, - "linear_dense_total": 8388608, - "linear_nnz": 2291712, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 1873920, - "linear_dense_total": 8388608, - "linear_nnz": 2660352, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 2054144, - "linear_dense_total": 8388608, - "linear_nnz": 2578432, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 1773568, - "linear_dense_total": 8388608, - "linear_nnz": 2822144, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 1968128, - "linear_dense_total": 8388608, - "linear_nnz": 2492416, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 1986560, - "linear_dense_total": 8388608, - "linear_nnz": 3297280, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 73713664, - "linear_sparsity": 69.16718064692982, - "linear_total": 239075328, - "nnz": 105691291, - "total": 271133698, - "total_sparsity": 61.01875503501597 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 10, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_10_d0.25", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_40/checkpoint-55330": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 1, - 2, - 3, - 4, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "1": [ - 0, - 1, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "10": [ - 0, - 3, - 6, - 7, - 8, - 10, - 12, - 13, - 14 - ], - "11": [ - 0, - 1, - 2, - 4, - 5, - 6, - 8, - 10, - 12 - ], - "12": [ - 1, - 2, - 3, - 5, - 6, - 7, - 8, - 10, - 12, - 13, - 14 - ], - "13": [ - 2, - 3, - 4, - 10, - 11, - 12 - ], - "14": [ - 1, - 2, - 3, - 4, - 8, - 11 - ], - "15": [ - 0, - 2, - 5, - 6, - 7, - 8, - 9, - 11, - 12 - ], - "16": [ - 3, - 6, - 8, - 10, - 12, - 13, - 15 - ], - "17": [ - 0, - 2, - 4, - 11, - 12, - 15 - ], - "18": [ - 2, - 3, - 5, - 9, - 11, - 12, - 13 - ], - "19": [ - 0, - 1, - 2, - 3, - 4, - 5, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "2": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "20": [ - 0, - 1, - 3, - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 15 - ], - "21": [ - 0, - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "22": [ - 0, - 1, - 2, - 3, - 5, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "23": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14 - ], - "3": [ - 0, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 12, - 13, - 14, - 15 - ], - "4": [ - 0, - 1, - 2, - 4, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "5": [ - 0, - 1, - 3, - 4, - 5, - 6, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "6": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "7": [ - 0, - 1, - 2, - 4, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "9": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 12, - 13, - 15 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 81.15421002838221, - "f1": 88.34901265417608 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_40/checkpoint-221320", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 28.669108856201174, - "eval_elapsed_time": 35.70603838330135 - }, - "speedup": 1.3462013485997515, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 88064, - "linear_dense_total": 8388608, - "linear_nnz": 1136640, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 102400, - "linear_dense_total": 8388608, - "linear_nnz": 626688, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 1835008, - "linear_attention_total": 1835008, - "linear_dense_nnz": 442368, - "linear_dense_total": 8388608, - "linear_nnz": 2277376, - "linear_total": 10223616, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 1835008, - "linear_attention_total": 1835008, - "linear_dense_nnz": 462848, - "linear_dense_total": 8388608, - "linear_nnz": 2297856, - "linear_total": 10223616, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 557056, - "linear_dense_total": 8388608, - "linear_nnz": 1867776, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 507904, - "linear_dense_total": 8388608, - "linear_nnz": 3129344, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 362496, - "linear_dense_total": 8388608, - "linear_nnz": 2983936, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 1835008, - "linear_attention_total": 1835008, - "linear_dense_nnz": 278528, - "linear_dense_total": 8388608, - "linear_nnz": 2113536, - "linear_total": 10223616, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 188416, - "linear_dense_total": 8388608, - "linear_nnz": 2547712, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 188416, - "linear_dense_total": 8388608, - "linear_nnz": 2809856, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 141312, - "linear_dense_total": 8388608, - "linear_nnz": 2500608, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 137216, - "linear_dense_total": 8388608, - "linear_nnz": 1185792, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 262144, - "linear_attention_total": 262144, - "linear_dense_nnz": 90112, - "linear_dense_total": 8388608, - "linear_nnz": 352256, - "linear_total": 8650752, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 57344, - "linear_dense_total": 8388608, - "linear_nnz": 843776, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 40960, - "linear_dense_total": 8388608, - "linear_nnz": 827392, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 40960, - "linear_dense_total": 8388608, - "linear_nnz": 565248, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 102400, - "linear_dense_total": 8388608, - "linear_nnz": 888832, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 155648, - "linear_dense_total": 8388608, - "linear_nnz": 679936, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 143360, - "linear_dense_total": 8388608, - "linear_nnz": 667648, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 167936, - "linear_dense_total": 8388608, - "linear_nnz": 692224, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 262144, - "linear_attention_total": 262144, - "linear_dense_nnz": 212992, - "linear_dense_total": 8388608, - "linear_nnz": 475136, - "linear_total": 8650752, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 178176, - "linear_dense_total": 8388608, - "linear_nnz": 1226752, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 229376, - "linear_dense_total": 8388608, - "linear_nnz": 753664, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1048576, - "linear_attention_total": 1048576, - "linear_dense_nnz": 370688, - "linear_dense_total": 8388608, - "linear_nnz": 1419264, - "linear_total": 9437184, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 34869248, - "linear_sparsity": 84.90174517593644, - "linear_total": 230948864, - "nnz": 66825924, - "total": 263001282, - "total_sparsity": 74.59102727871874 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_40", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 10, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_40", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_40", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_40_d0.25/checkpoint-52500": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 1, - 2, - 3, - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "1": [ - 0, - 1, - 2, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "10": [ - 0, - 3, - 6, - 7, - 8, - 10, - 12, - 13, - 14 - ], - "11": [ - 0, - 1, - 2, - 4, - 5, - 6, - 7, - 8, - 10, - 12, - 15 - ], - "12": [ - 2, - 3, - 5, - 6, - 7, - 8, - 9, - 10, - 12, - 13, - 14 - ], - "13": [ - 2, - 3, - 4, - 10, - 11, - 12 - ], - "14": [ - 1, - 2, - 3, - 4, - 8, - 9, - 11, - 13 - ], - "15": [ - 0, - 1, - 2, - 5, - 6, - 7, - 8, - 9, - 11, - 12 - ], - "16": [ - 3, - 6, - 7, - 8, - 10, - 12, - 13, - 15 - ], - "17": [ - 0, - 2, - 4, - 11, - 12, - 15 - ], - "18": [ - 2, - 3, - 5, - 9, - 11, - 12, - 13 - ], - "19": [ - 0, - 1, - 2, - 3, - 4, - 5, - 9, - 10, - 11, - 13, - 15 - ], - "2": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14, - 15 - ], - "20": [ - 0, - 1, - 3, - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "21": [ - 0, - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "22": [ - 0, - 1, - 2, - 3, - 4, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14 - ], - "23": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14 - ], - "3": [ - 0, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 12, - 13, - 14, - 15 - ], - "4": [ - 0, - 1, - 2, - 4, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "5": [ - 0, - 1, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "6": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "7": [ - 0, - 1, - 2, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "9": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 11, - 12, - 13, - 15 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 82.32734153263955, - "f1": 89.39825688878855 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_40_d0.25/checkpoint-220000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.977725273132325, - "eval_elapsed_time": 37.05464425915852 - }, - "speedup": 1.2874356761138743, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 253952, - "linear_dense_total": 8388608, - "linear_nnz": 778240, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 432128, - "linear_dense_total": 8388608, - "linear_nnz": 956416, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 1835008, - "linear_attention_total": 1835008, - "linear_dense_nnz": 1210368, - "linear_dense_total": 8388608, - "linear_nnz": 3045376, - "linear_total": 10223616, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 1277952, - "linear_dense_total": 8388608, - "linear_nnz": 2588672, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 1400832, - "linear_dense_total": 8388608, - "linear_nnz": 2711552, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1464320, - "linear_dense_total": 8388608, - "linear_nnz": 4085760, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 1122304, - "linear_dense_total": 8388608, - "linear_nnz": 3219456, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 778240, - "linear_dense_total": 8388608, - "linear_nnz": 2351104, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 532480, - "linear_dense_total": 8388608, - "linear_nnz": 2629632, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 456704, - "linear_dense_total": 8388608, - "linear_nnz": 3078144, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 440320, - "linear_dense_total": 8388608, - "linear_nnz": 2799616, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 362496, - "linear_dense_total": 8388608, - "linear_nnz": 1673216, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 450560, - "linear_dense_total": 8388608, - "linear_nnz": 974848, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 184320, - "linear_dense_total": 8388608, - "linear_nnz": 708608, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 899072, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 114688, - "linear_dense_total": 8388608, - "linear_nnz": 901120, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 184320, - "linear_dense_total": 8388608, - "linear_nnz": 708608, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 548864, - "linear_dense_total": 8388608, - "linear_nnz": 1073152, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 614400, - "linear_dense_total": 8388608, - "linear_nnz": 1138688, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 262144, - "linear_attention_total": 262144, - "linear_dense_nnz": 839680, - "linear_dense_total": 8388608, - "linear_nnz": 1101824, - "linear_total": 8650752, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 262144, - "linear_attention_total": 262144, - "linear_dense_nnz": 858112, - "linear_dense_total": 8388608, - "linear_nnz": 1120256, - "linear_total": 8650752, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 636928, - "linear_dense_total": 8388608, - "linear_nnz": 1423360, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 847872, - "linear_dense_total": 8388608, - "linear_nnz": 1372160, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 1687552, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 43026432, - "linear_sparsity": 81.15581946039036, - "linear_total": 228327424, - "nnz": 74986451, - "total": 260377922, - "total_sparsity": 71.20091810241884 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_40_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 10, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_40_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_40_d0.25", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_40_d0.25/checkpoint-55000": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 1, - 2, - 3, - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "1": [ - 0, - 1, - 2, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "10": [ - 0, - 3, - 6, - 7, - 8, - 10, - 12, - 13, - 14 - ], - "11": [ - 0, - 1, - 2, - 4, - 5, - 6, - 7, - 8, - 10, - 12, - 15 - ], - "12": [ - 2, - 3, - 5, - 6, - 7, - 8, - 9, - 10, - 12, - 13, - 14 - ], - "13": [ - 2, - 3, - 4, - 10, - 11, - 12 - ], - "14": [ - 1, - 2, - 3, - 4, - 8, - 9, - 11, - 13 - ], - "15": [ - 0, - 1, - 2, - 5, - 6, - 7, - 8, - 9, - 11, - 12 - ], - "16": [ - 3, - 6, - 7, - 8, - 10, - 12, - 13, - 15 - ], - "17": [ - 0, - 2, - 4, - 11, - 12, - 15 - ], - "18": [ - 2, - 3, - 5, - 9, - 11, - 12, - 13 - ], - "19": [ - 0, - 1, - 2, - 3, - 4, - 5, - 9, - 10, - 11, - 13, - 15 - ], - "2": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14, - 15 - ], - "20": [ - 0, - 1, - 3, - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "21": [ - 0, - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "22": [ - 0, - 1, - 2, - 3, - 4, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14 - ], - "23": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14 - ], - "3": [ - 0, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 12, - 13, - 14, - 15 - ], - "4": [ - 0, - 1, - 2, - 4, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "5": [ - 0, - 1, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "6": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "7": [ - 0, - 1, - 2, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "9": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 11, - 12, - 13, - 15 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 82.28003784295176, - "f1": 89.37602873453882 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_40_d0.25/checkpoint-220000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.986146453857423, - "eval_elapsed_time": 37.03868922078982 - }, - "speedup": 1.287074118201884, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 253952, - "linear_dense_total": 8388608, - "linear_nnz": 778240, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 432128, - "linear_dense_total": 8388608, - "linear_nnz": 956416, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 1835008, - "linear_attention_total": 1835008, - "linear_dense_nnz": 1210368, - "linear_dense_total": 8388608, - "linear_nnz": 3045376, - "linear_total": 10223616, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 1277952, - "linear_dense_total": 8388608, - "linear_nnz": 2588672, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 1400832, - "linear_dense_total": 8388608, - "linear_nnz": 2711552, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1464320, - "linear_dense_total": 8388608, - "linear_nnz": 4085760, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 1122304, - "linear_dense_total": 8388608, - "linear_nnz": 3219456, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 778240, - "linear_dense_total": 8388608, - "linear_nnz": 2351104, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 532480, - "linear_dense_total": 8388608, - "linear_nnz": 2629632, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 456704, - "linear_dense_total": 8388608, - "linear_nnz": 3078144, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 440320, - "linear_dense_total": 8388608, - "linear_nnz": 2799616, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 362496, - "linear_dense_total": 8388608, - "linear_nnz": 1673216, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 450560, - "linear_dense_total": 8388608, - "linear_nnz": 974848, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 184320, - "linear_dense_total": 8388608, - "linear_nnz": 708608, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 899072, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 114688, - "linear_dense_total": 8388608, - "linear_nnz": 901120, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 184320, - "linear_dense_total": 8388608, - "linear_nnz": 708608, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 548864, - "linear_dense_total": 8388608, - "linear_nnz": 1073152, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 614400, - "linear_dense_total": 8388608, - "linear_nnz": 1138688, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 262144, - "linear_attention_total": 262144, - "linear_dense_nnz": 839680, - "linear_dense_total": 8388608, - "linear_nnz": 1101824, - "linear_total": 8650752, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 262144, - "linear_attention_total": 262144, - "linear_dense_nnz": 858112, - "linear_dense_total": 8388608, - "linear_nnz": 1120256, - "linear_total": 8650752, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 636928, - "linear_dense_total": 8388608, - "linear_nnz": 1423360, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 847872, - "linear_dense_total": 8388608, - "linear_nnz": 1372160, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 1687552, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 43026432, - "linear_sparsity": 81.15581946039036, - "linear_total": 228327424, - "nnz": 74986451, - "total": 260377922, - "total_sparsity": 71.20091810241884 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_40_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 10, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_40_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_40_d0.25", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_40_d0.25/checkpoint-55330": { - "config": { - "_name_or_path": "/home/lagunas/devel/hf/nn_pruning/nn_pruning/analysis/tmp_finetune", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "pruned_heads": { - "0": [ - 0, - 1, - 2, - 3, - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "1": [ - 0, - 1, - 2, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "10": [ - 0, - 3, - 6, - 7, - 8, - 10, - 12, - 13, - 14 - ], - "11": [ - 0, - 1, - 2, - 4, - 5, - 6, - 7, - 8, - 10, - 12, - 15 - ], - "12": [ - 2, - 3, - 5, - 6, - 7, - 8, - 9, - 10, - 12, - 13, - 14 - ], - "13": [ - 2, - 3, - 4, - 10, - 11, - 12 - ], - "14": [ - 1, - 2, - 3, - 4, - 8, - 9, - 11, - 13 - ], - "15": [ - 0, - 1, - 2, - 5, - 6, - 7, - 8, - 9, - 11, - 12 - ], - "16": [ - 3, - 6, - 7, - 8, - 10, - 12, - 13, - 15 - ], - "17": [ - 0, - 2, - 4, - 11, - 12, - 15 - ], - "18": [ - 2, - 3, - 5, - 9, - 11, - 12, - 13 - ], - "19": [ - 0, - 1, - 2, - 3, - 4, - 5, - 9, - 10, - 11, - 13, - 15 - ], - "2": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14, - 15 - ], - "20": [ - 0, - 1, - 3, - 4, - 5, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "21": [ - 0, - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "22": [ - 0, - 1, - 2, - 3, - 4, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14 - ], - "23": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14 - ], - "3": [ - 0, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 12, - 13, - 14, - 15 - ], - "4": [ - 0, - 1, - 2, - 4, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "5": [ - 0, - 1, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "6": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 13, - 14, - 15 - ], - "7": [ - 0, - 1, - 2, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "8": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 8, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "9": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 11, - 12, - 13, - 15 - ] - }, - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 82.25165562913908, - "f1": 89.36914341970648 - }, - "source_checkpoint": "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_40_d0.25/checkpoint-220000", - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 1, - "attention_block_rows": 1, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "topK", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "topK", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": 1, - "final_threshold": 0.5, - "final_warmup": 0, - "initial_ampere_temperature": 0.0, - "initial_threshold": 1.0, - "initial_warmup": 0, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "", - "regularization_final_lambda": 0 - }, - "speed": { - "cuda_eval_elapsed_time": 29.99512833404541, - "eval_elapsed_time": 37.09979658899829 - }, - "speedup": 1.2866887107652496, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 253952, - "linear_dense_total": 8388608, - "linear_nnz": 778240, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 432128, - "linear_dense_total": 8388608, - "linear_nnz": 956416, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 1835008, - "linear_attention_total": 1835008, - "linear_dense_nnz": 1210368, - "linear_dense_total": 8388608, - "linear_nnz": 3045376, - "linear_total": 10223616, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 1277952, - "linear_dense_total": 8388608, - "linear_nnz": 2588672, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 1400832, - "linear_dense_total": 8388608, - "linear_nnz": 2711552, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 1464320, - "linear_dense_total": 8388608, - "linear_nnz": 4085760, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 1122304, - "linear_dense_total": 8388608, - "linear_nnz": 3219456, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 1572864, - "linear_attention_total": 1572864, - "linear_dense_nnz": 778240, - "linear_dense_total": 8388608, - "linear_nnz": 2351104, - "linear_total": 9961472, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 2097152, - "linear_attention_total": 2097152, - "linear_dense_nnz": 532480, - "linear_dense_total": 8388608, - "linear_nnz": 2629632, - "linear_total": 10485760, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 2621440, - "linear_attention_total": 2621440, - "linear_dense_nnz": 456704, - "linear_dense_total": 8388608, - "linear_nnz": 3078144, - "linear_total": 11010048, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 2359296, - "linear_attention_total": 2359296, - "linear_dense_nnz": 440320, - "linear_dense_total": 8388608, - "linear_nnz": 2799616, - "linear_total": 10747904, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 1310720, - "linear_attention_total": 1310720, - "linear_dense_nnz": 362496, - "linear_dense_total": 8388608, - "linear_nnz": 1673216, - "linear_total": 9699328, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 450560, - "linear_dense_total": 8388608, - "linear_nnz": 974848, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 184320, - "linear_dense_total": 8388608, - "linear_nnz": 708608, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 899072, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 114688, - "linear_dense_total": 8388608, - "linear_nnz": 901120, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 184320, - "linear_dense_total": 8388608, - "linear_nnz": 708608, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 548864, - "linear_dense_total": 8388608, - "linear_nnz": 1073152, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 614400, - "linear_dense_total": 8388608, - "linear_nnz": 1138688, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 262144, - "linear_attention_total": 262144, - "linear_dense_nnz": 839680, - "linear_dense_total": 8388608, - "linear_nnz": 1101824, - "linear_total": 8650752, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 262144, - "linear_attention_total": 262144, - "linear_dense_nnz": 858112, - "linear_dense_total": 8388608, - "linear_nnz": 1120256, - "linear_total": 8650752, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 636928, - "linear_dense_total": 8388608, - "linear_nnz": 1423360, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 524288, - "linear_attention_total": 524288, - "linear_dense_nnz": 847872, - "linear_dense_total": 8388608, - "linear_nnz": 1372160, - "linear_total": 8912896, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 786432, - "linear_attention_total": 786432, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 1687552, - "linear_total": 9175040, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 43026432, - "linear_sparsity": 81.15581946039036, - "linear_total": 228327424, - "nnz": 74986451, - "total": 260377922, - "total_sparsity": 71.20091810241884 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 2500, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_40_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 10, - "optimize_model_before_eval": "disabled", - "output_dir": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_40_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 128, - "per_device_train_batch_size": 16, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "/data_2to/devel_data/nn_pruning/output/squad_test_final_fine_tune/fine_tuned_large_regu_40_d0.25", - "save_steps": 2500, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 10, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-215000": { - "config": { - "_name_or_path": "bert-large-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 82.33680227057711, - "f1": 89.04761607630476 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": false, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 37.50764268493653, - "eval_elapsed_time": 44.93039320781827 - }, - "speedup": 1.0289741034797428, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 974848, - "linear_attention_total": 4194304, - "linear_dense_nnz": 192512, - "linear_dense_total": 8388608, - "linear_nnz": 1167360, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 306176, - "linear_attention_total": 4194304, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 576512, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 1714176, - "linear_attention_total": 4194304, - "linear_dense_nnz": 995328, - "linear_dense_total": 8388608, - "linear_nnz": 2709504, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 1875968, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1032192, - "linear_dense_total": 8388608, - "linear_nnz": 2908160, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 1832960, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1241088, - "linear_dense_total": 8388608, - "linear_nnz": 3074048, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 2155520, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1179648, - "linear_dense_total": 8388608, - "linear_nnz": 3335168, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 1942528, - "linear_attention_total": 4194304, - "linear_dense_nnz": 909312, - "linear_dense_total": 8388608, - "linear_nnz": 2851840, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2079744, - "linear_attention_total": 4194304, - "linear_dense_nnz": 681984, - "linear_dense_total": 8388608, - "linear_nnz": 2761728, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 1843200, - "linear_attention_total": 4194304, - "linear_dense_nnz": 473088, - "linear_dense_total": 8388608, - "linear_nnz": 2316288, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 1582080, - "linear_attention_total": 4194304, - "linear_dense_nnz": 368640, - "linear_dense_total": 8388608, - "linear_nnz": 1950720, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 1435648, - "linear_attention_total": 4194304, - "linear_dense_nnz": 321536, - "linear_dense_total": 8388608, - "linear_nnz": 1757184, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 717824, - "linear_attention_total": 4194304, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 988160, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 297984, - "linear_attention_total": 4194304, - "linear_dense_nnz": 286720, - "linear_dense_total": 8388608, - "linear_nnz": 584704, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 334848, - "linear_attention_total": 4194304, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 447488, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 358400, - "linear_attention_total": 4194304, - "linear_dense_nnz": 77824, - "linear_dense_total": 8388608, - "linear_nnz": 436224, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 134144, - "linear_attention_total": 4194304, - "linear_dense_nnz": 79872, - "linear_dense_total": 8388608, - "linear_nnz": 214016, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 111616, - "linear_attention_total": 4194304, - "linear_dense_nnz": 182272, - "linear_dense_total": 8388608, - "linear_nnz": 293888, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 834560, - "linear_attention_total": 4194304, - "linear_dense_nnz": 413696, - "linear_dense_total": 8388608, - "linear_nnz": 1248256, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 381952, - "linear_attention_total": 4194304, - "linear_dense_nnz": 466944, - "linear_dense_total": 8388608, - "linear_nnz": 848896, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 406528, - "linear_attention_total": 4194304, - "linear_dense_nnz": 552960, - "linear_dense_total": 8388608, - "linear_nnz": 959488, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 522240, - "linear_attention_total": 4194304, - "linear_dense_nnz": 608256, - "linear_dense_total": 8388608, - "linear_nnz": 1130496, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 771072, - "linear_attention_total": 4194304, - "linear_dense_nnz": 438272, - "linear_dense_total": 8388608, - "linear_nnz": 1209344, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 414720, - "linear_attention_total": 4194304, - "linear_dense_nnz": 661504, - "linear_dense_total": 8388608, - "linear_nnz": 1076224, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1091584, - "linear_attention_total": 4194304, - "linear_dense_nnz": 747520, - "linear_dense_total": 8388608, - "linear_nnz": 1839104, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 36684800, - "linear_sparsity": 87.85230848524306, - "linear_total": 301989888, - "nnz": 68649433, - "total": 334094338, - "total_sparsity": 79.45208128609471 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test_large/squad_test_large_regu-10", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test_large/squad_test_large_regu-10", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 8, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test_large/squad_test_large_regu-10", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-220000": { - "config": { - "_name_or_path": "bert-large-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 82.13812677388836, - "f1": 89.03656646065757 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": false, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 37.54432637023926, - "eval_elapsed_time": 44.93571184715256 - }, - "speedup": 1.0279687168915141, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 989184, - "linear_attention_total": 4194304, - "linear_dense_nnz": 192512, - "linear_dense_total": 8388608, - "linear_nnz": 1181696, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 323584, - "linear_attention_total": 4194304, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 593920, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 1745920, - "linear_attention_total": 4194304, - "linear_dense_nnz": 995328, - "linear_dense_total": 8388608, - "linear_nnz": 2741248, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 1902592, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1032192, - "linear_dense_total": 8388608, - "linear_nnz": 2934784, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 1782784, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1241088, - "linear_dense_total": 8388608, - "linear_nnz": 3023872, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 2147328, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1179648, - "linear_dense_total": 8388608, - "linear_nnz": 3326976, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 1917952, - "linear_attention_total": 4194304, - "linear_dense_nnz": 909312, - "linear_dense_total": 8388608, - "linear_nnz": 2827264, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2049024, - "linear_attention_total": 4194304, - "linear_dense_nnz": 681984, - "linear_dense_total": 8388608, - "linear_nnz": 2731008, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 1820672, - "linear_attention_total": 4194304, - "linear_dense_nnz": 473088, - "linear_dense_total": 8388608, - "linear_nnz": 2293760, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 1562624, - "linear_attention_total": 4194304, - "linear_dense_nnz": 368640, - "linear_dense_total": 8388608, - "linear_nnz": 1931264, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 1390592, - "linear_attention_total": 4194304, - "linear_dense_nnz": 321536, - "linear_dense_total": 8388608, - "linear_nnz": 1712128, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 688128, - "linear_attention_total": 4194304, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 958464, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 286720, - "linear_attention_total": 4194304, - "linear_dense_nnz": 286720, - "linear_dense_total": 8388608, - "linear_nnz": 573440, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 326656, - "linear_attention_total": 4194304, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 439296, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 344064, - "linear_attention_total": 4194304, - "linear_dense_nnz": 77824, - "linear_dense_total": 8388608, - "linear_nnz": 421888, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 129024, - "linear_attention_total": 4194304, - "linear_dense_nnz": 79872, - "linear_dense_total": 8388608, - "linear_nnz": 208896, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 116736, - "linear_attention_total": 4194304, - "linear_dense_nnz": 182272, - "linear_dense_total": 8388608, - "linear_nnz": 299008, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 801792, - "linear_attention_total": 4194304, - "linear_dense_nnz": 413696, - "linear_dense_total": 8388608, - "linear_nnz": 1215488, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 396288, - "linear_attention_total": 4194304, - "linear_dense_nnz": 466944, - "linear_dense_total": 8388608, - "linear_nnz": 863232, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 405504, - "linear_attention_total": 4194304, - "linear_dense_nnz": 552960, - "linear_dense_total": 8388608, - "linear_nnz": 958464, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 520192, - "linear_attention_total": 4194304, - "linear_dense_nnz": 604160, - "linear_dense_total": 8388608, - "linear_nnz": 1124352, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 764928, - "linear_attention_total": 4194304, - "linear_dense_nnz": 438272, - "linear_dense_total": 8388608, - "linear_nnz": 1203200, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 423936, - "linear_attention_total": 4194304, - "linear_dense_nnz": 659456, - "linear_dense_total": 8388608, - "linear_nnz": 1083392, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1070080, - "linear_attention_total": 4194304, - "linear_dense_nnz": 747520, - "linear_dense_total": 8388608, - "linear_nnz": 1817600, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 36464640, - "linear_sparsity": 87.92521158854166, - "linear_total": 301989888, - "nnz": 68429014, - "total": 334094338, - "total_sparsity": 79.51805636406804 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test_large/squad_test_large_regu-10", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test_large/squad_test_large_regu-10", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 8, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test_large/squad_test_large_regu-10", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10/checkpoint-221320": { - "config": { - "_name_or_path": "bert-large-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 82.30842005676443, - "f1": 89.04987146464723 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": false, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 37.53598588562012, - "eval_elapsed_time": 44.935436787083745 - }, - "speedup": 1.028197131226982, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 978944, - "linear_attention_total": 4194304, - "linear_dense_nnz": 192512, - "linear_dense_total": 8388608, - "linear_nnz": 1171456, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 319488, - "linear_attention_total": 4194304, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 589824, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 1754112, - "linear_attention_total": 4194304, - "linear_dense_nnz": 995328, - "linear_dense_total": 8388608, - "linear_nnz": 2749440, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 1922048, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1032192, - "linear_dense_total": 8388608, - "linear_nnz": 2954240, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 1775616, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1241088, - "linear_dense_total": 8388608, - "linear_nnz": 3016704, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 2149376, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1179648, - "linear_dense_total": 8388608, - "linear_nnz": 3329024, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 1954816, - "linear_attention_total": 4194304, - "linear_dense_nnz": 909312, - "linear_dense_total": 8388608, - "linear_nnz": 2864128, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 2065408, - "linear_attention_total": 4194304, - "linear_dense_nnz": 681984, - "linear_dense_total": 8388608, - "linear_nnz": 2747392, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 1823744, - "linear_attention_total": 4194304, - "linear_dense_nnz": 473088, - "linear_dense_total": 8388608, - "linear_nnz": 2296832, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 1558528, - "linear_attention_total": 4194304, - "linear_dense_nnz": 368640, - "linear_dense_total": 8388608, - "linear_nnz": 1927168, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 1356800, - "linear_attention_total": 4194304, - "linear_dense_nnz": 321536, - "linear_dense_total": 8388608, - "linear_nnz": 1678336, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 688128, - "linear_attention_total": 4194304, - "linear_dense_nnz": 270336, - "linear_dense_total": 8388608, - "linear_nnz": 958464, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 293888, - "linear_attention_total": 4194304, - "linear_dense_nnz": 286720, - "linear_dense_total": 8388608, - "linear_nnz": 580608, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 326656, - "linear_attention_total": 4194304, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 439296, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 344064, - "linear_attention_total": 4194304, - "linear_dense_nnz": 77824, - "linear_dense_total": 8388608, - "linear_nnz": 421888, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 129024, - "linear_attention_total": 4194304, - "linear_dense_nnz": 79872, - "linear_dense_total": 8388608, - "linear_nnz": 208896, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 116736, - "linear_attention_total": 4194304, - "linear_dense_nnz": 182272, - "linear_dense_total": 8388608, - "linear_nnz": 299008, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 795648, - "linear_attention_total": 4194304, - "linear_dense_nnz": 413696, - "linear_dense_total": 8388608, - "linear_nnz": 1209344, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 395264, - "linear_attention_total": 4194304, - "linear_dense_nnz": 466944, - "linear_dense_total": 8388608, - "linear_nnz": 862208, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 392192, - "linear_attention_total": 4194304, - "linear_dense_nnz": 552960, - "linear_dense_total": 8388608, - "linear_nnz": 945152, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 523264, - "linear_attention_total": 4194304, - "linear_dense_nnz": 604160, - "linear_dense_total": 8388608, - "linear_nnz": 1127424, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 784384, - "linear_attention_total": 4194304, - "linear_dense_nnz": 438272, - "linear_dense_total": 8388608, - "linear_nnz": 1222656, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 416768, - "linear_attention_total": 4194304, - "linear_dense_nnz": 659456, - "linear_dense_total": 8388608, - "linear_nnz": 1076224, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 1069056, - "linear_attention_total": 4194304, - "linear_dense_nnz": 747520, - "linear_dense_total": 8388608, - "linear_nnz": 1816576, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 36492288, - "linear_sparsity": 87.91605631510416, - "linear_total": 301989888, - "nnz": 68456822, - "total": 334094338, - "total_sparsity": 79.50973296650122 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 5000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test_large/squad_test_large_regu-10", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test_large/squad_test_large_regu-10", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 8, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test_large/squad_test_large_regu-10", - "save_steps": 5000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10_d0.25/checkpoint-210000": { - "config": { - "_name_or_path": "bert-large-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 83.78429517502366, - "f1": 90.32458147221426 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": false, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 41.496326583862306, - "eval_elapsed_time": 49.08256564009935 - }, - "speedup": 0.9300676995438012, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 783360, - "linear_attention_total": 4194304, - "linear_dense_nnz": 835584, - "linear_dense_total": 8388608, - "linear_nnz": 1618944, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 326656, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1275904, - "linear_dense_total": 8388608, - "linear_nnz": 1602560, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 1636352, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2410496, - "linear_dense_total": 8388608, - "linear_nnz": 4046848, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 1575936, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2510848, - "linear_dense_total": 8388608, - "linear_nnz": 4086784, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 1203200, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2660352, - "linear_dense_total": 8388608, - "linear_nnz": 3863552, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 2030592, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2605056, - "linear_dense_total": 8388608, - "linear_nnz": 4635648, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 1785856, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2299904, - "linear_dense_total": 8388608, - "linear_nnz": 4085760, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 1946624, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1699840, - "linear_dense_total": 8388608, - "linear_nnz": 3646464, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 1647616, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1402880, - "linear_dense_total": 8388608, - "linear_nnz": 3050496, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 1538048, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1097728, - "linear_dense_total": 8388608, - "linear_nnz": 2635776, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 1169408, - "linear_attention_total": 4194304, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 2070528, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 607232, - "linear_attention_total": 4194304, - "linear_dense_nnz": 739328, - "linear_dense_total": 8388608, - "linear_nnz": 1346560, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 305152, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1359872, - "linear_dense_total": 8388608, - "linear_nnz": 1665024, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 396288, - "linear_attention_total": 4194304, - "linear_dense_nnz": 358400, - "linear_dense_total": 8388608, - "linear_nnz": 754688, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 284672, - "linear_attention_total": 4194304, - "linear_dense_nnz": 194560, - "linear_dense_total": 8388608, - "linear_nnz": 479232, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 70656, - "linear_attention_total": 4194304, - "linear_dense_nnz": 180224, - "linear_dense_total": 8388608, - "linear_nnz": 250880, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 111616, - "linear_attention_total": 4194304, - "linear_dense_nnz": 323584, - "linear_dense_total": 8388608, - "linear_nnz": 435200, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 626688, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1685504, - "linear_dense_total": 8388608, - "linear_nnz": 2312192, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 369664, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1767424, - "linear_dense_total": 8388608, - "linear_nnz": 2137088, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 463872, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1873920, - "linear_dense_total": 8388608, - "linear_nnz": 2337792, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 294912, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2054144, - "linear_dense_total": 8388608, - "linear_nnz": 2349056, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 613376, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1773568, - "linear_dense_total": 8388608, - "linear_nnz": 2386944, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 208896, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1968128, - "linear_dense_total": 8388608, - "linear_nnz": 2177024, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 923648, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1986560, - "linear_dense_total": 8388608, - "linear_nnz": 2910208, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 56885248, - "linear_sparsity": 81.16319444444444, - "linear_total": 301989888, - "nnz": 88857851, - "total": 334094338, - "total_sparsity": 73.40336518962498 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 10000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test_large/squad_test_large_regu_10_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test_large/squad_test_large_regu_10_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 8, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test_large/squad_test_large_regu_10_d0.25", - "save_steps": 10000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_10_d0.25/checkpoint-221320": { - "config": { - "_name_or_path": "bert-large-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 83.66130558183538, - "f1": 90.22195941338013 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": false, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 10 - }, - "speed": { - "cuda_eval_elapsed_time": 41.275371505737304, - "eval_elapsed_time": 48.98561626393348 - }, - "speedup": 0.9350465325310627, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 766976, - "linear_attention_total": 4194304, - "linear_dense_nnz": 831488, - "linear_dense_total": 8388608, - "linear_nnz": 1598464, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 338944, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1273856, - "linear_dense_total": 8388608, - "linear_nnz": 1612800, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 1596416, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2408448, - "linear_dense_total": 8388608, - "linear_nnz": 4004864, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 1615872, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2508800, - "linear_dense_total": 8388608, - "linear_nnz": 4124672, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 1205248, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2658304, - "linear_dense_total": 8388608, - "linear_nnz": 3863552, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 2006016, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2603008, - "linear_dense_total": 8388608, - "linear_nnz": 4609024, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 1718272, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2299904, - "linear_dense_total": 8388608, - "linear_nnz": 4018176, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 1935360, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1699840, - "linear_dense_total": 8388608, - "linear_nnz": 3635200, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 1612800, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1402880, - "linear_dense_total": 8388608, - "linear_nnz": 3015680, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 1502208, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1097728, - "linear_dense_total": 8388608, - "linear_nnz": 2599936, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 1167360, - "linear_attention_total": 4194304, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 2068480, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 601088, - "linear_attention_total": 4194304, - "linear_dense_nnz": 739328, - "linear_dense_total": 8388608, - "linear_nnz": 1340416, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 305152, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1357824, - "linear_dense_total": 8388608, - "linear_nnz": 1662976, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 364544, - "linear_attention_total": 4194304, - "linear_dense_nnz": 356352, - "linear_dense_total": 8388608, - "linear_nnz": 720896, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 274432, - "linear_attention_total": 4194304, - "linear_dense_nnz": 194560, - "linear_dense_total": 8388608, - "linear_nnz": 468992, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 70656, - "linear_attention_total": 4194304, - "linear_dense_nnz": 180224, - "linear_dense_total": 8388608, - "linear_nnz": 250880, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 102400, - "linear_attention_total": 4194304, - "linear_dense_nnz": 321536, - "linear_dense_total": 8388608, - "linear_nnz": 423936, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 621568, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1685504, - "linear_dense_total": 8388608, - "linear_nnz": 2307072, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 377856, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1767424, - "linear_dense_total": 8388608, - "linear_nnz": 2145280, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 460800, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1871872, - "linear_dense_total": 8388608, - "linear_nnz": 2332672, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 309248, - "linear_attention_total": 4194304, - "linear_dense_nnz": 2054144, - "linear_dense_total": 8388608, - "linear_nnz": 2363392, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 583680, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1773568, - "linear_dense_total": 8388608, - "linear_nnz": 2357248, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 215040, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1966080, - "linear_dense_total": 8388608, - "linear_nnz": 2181120, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 916480, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1986560, - "linear_dense_total": 8388608, - "linear_nnz": 2903040, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 56608768, - "linear_sparsity": 81.25474717881944, - "linear_total": 301989888, - "nnz": 88581359, - "total": 334094338, - "total_sparsity": 73.4861238504437 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 10000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test_large/squad_test_large_regu_10_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test_large/squad_test_large_regu_10_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 8, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test_large/squad_test_large_regu_10_d0.25", - "save_steps": 10000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_40/checkpoint-221320": { - "config": { - "_name_or_path": "bert-large-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 78.92147587511826, - "f1": 86.66302391758462 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 1.0, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": false, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 28.611265159606933, - "eval_elapsed_time": 36.00721236690879 - }, - "speedup": 1.3489229780673324, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 668672, - "linear_attention_total": 4194304, - "linear_dense_nnz": 88064, - "linear_dense_total": 8388608, - "linear_nnz": 756736, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 232448, - "linear_attention_total": 4194304, - "linear_dense_nnz": 102400, - "linear_dense_total": 8388608, - "linear_nnz": 334848, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 864256, - "linear_attention_total": 4194304, - "linear_dense_nnz": 442368, - "linear_dense_total": 8388608, - "linear_nnz": 1306624, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 985088, - "linear_attention_total": 4194304, - "linear_dense_nnz": 462848, - "linear_dense_total": 8388608, - "linear_nnz": 1447936, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 726016, - "linear_attention_total": 4194304, - "linear_dense_nnz": 557056, - "linear_dense_total": 8388608, - "linear_nnz": 1283072, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 1306624, - "linear_attention_total": 4194304, - "linear_dense_nnz": 507904, - "linear_dense_total": 8388608, - "linear_nnz": 1814528, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 1107968, - "linear_attention_total": 4194304, - "linear_dense_nnz": 362496, - "linear_dense_total": 8388608, - "linear_nnz": 1470464, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 1074176, - "linear_attention_total": 4194304, - "linear_dense_nnz": 278528, - "linear_dense_total": 8388608, - "linear_nnz": 1352704, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 951296, - "linear_attention_total": 4194304, - "linear_dense_nnz": 188416, - "linear_dense_total": 8388608, - "linear_nnz": 1139712, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 795648, - "linear_attention_total": 4194304, - "linear_dense_nnz": 188416, - "linear_dense_total": 8388608, - "linear_nnz": 984064, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 706560, - "linear_attention_total": 4194304, - "linear_dense_nnz": 141312, - "linear_dense_total": 8388608, - "linear_nnz": 847872, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 290816, - "linear_attention_total": 4194304, - "linear_dense_nnz": 137216, - "linear_dense_total": 8388608, - "linear_nnz": 428032, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 146432, - "linear_attention_total": 4194304, - "linear_dense_nnz": 90112, - "linear_dense_total": 8388608, - "linear_nnz": 236544, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 186368, - "linear_attention_total": 4194304, - "linear_dense_nnz": 57344, - "linear_dense_total": 8388608, - "linear_nnz": 243712, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 194560, - "linear_attention_total": 4194304, - "linear_dense_nnz": 40960, - "linear_dense_total": 8388608, - "linear_nnz": 235520, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 46080, - "linear_attention_total": 4194304, - "linear_dense_nnz": 40960, - "linear_dense_total": 8388608, - "linear_nnz": 87040, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 54272, - "linear_attention_total": 4194304, - "linear_dense_nnz": 102400, - "linear_dense_total": 8388608, - "linear_nnz": 156672, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 359424, - "linear_attention_total": 4194304, - "linear_dense_nnz": 155648, - "linear_dense_total": 8388608, - "linear_nnz": 515072, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 349184, - "linear_attention_total": 4194304, - "linear_dense_nnz": 143360, - "linear_dense_total": 8388608, - "linear_nnz": 492544, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 270336, - "linear_attention_total": 4194304, - "linear_dense_nnz": 167936, - "linear_dense_total": 8388608, - "linear_nnz": 438272, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 196608, - "linear_attention_total": 4194304, - "linear_dense_nnz": 212992, - "linear_dense_total": 8388608, - "linear_nnz": 409600, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 494592, - "linear_attention_total": 4194304, - "linear_dense_nnz": 178176, - "linear_dense_total": 8388608, - "linear_nnz": 672768, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 173056, - "linear_attention_total": 4194304, - "linear_dense_nnz": 229376, - "linear_dense_total": 8388608, - "linear_nnz": 402432, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 630784, - "linear_attention_total": 4194304, - "linear_dense_nnz": 370688, - "linear_dense_total": 8388608, - "linear_nnz": 1001472, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 18058240, - "linear_sparsity": 94.02025010850694, - "linear_total": 301989888, - "nnz": 50008420, - "total": 334094338, - "total_sparsity": 85.0316469595483 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 10000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test_large/squad_test_large_regu_40", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test_large/squad_test_large_regu_40", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 8, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test_large/squad_test_large_regu_40", - "save_steps": 10000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_40_d0.25/checkpoint-220000": { - "config": { - "_name_or_path": "bert-large-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.59602649006622, - "f1": 87.8561484925226 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": false, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 29.83378296661377, - "eval_elapsed_time": 37.31617963500321 - }, - "speedup": 1.2936473074353696, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 340992, - "linear_attention_total": 4194304, - "linear_dense_nnz": 253952, - "linear_dense_total": 8388608, - "linear_nnz": 594944, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 141312, - "linear_attention_total": 4194304, - "linear_dense_nnz": 432128, - "linear_dense_total": 8388608, - "linear_nnz": 573440, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 832512, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1210368, - "linear_dense_total": 8388608, - "linear_nnz": 2042880, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 765952, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1277952, - "linear_dense_total": 8388608, - "linear_nnz": 2043904, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 720896, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1400832, - "linear_dense_total": 8388608, - "linear_nnz": 2121728, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 1234944, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1464320, - "linear_dense_total": 8388608, - "linear_nnz": 2699264, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 879616, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1122304, - "linear_dense_total": 8388608, - "linear_nnz": 2001920, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 917504, - "linear_attention_total": 4194304, - "linear_dense_nnz": 778240, - "linear_dense_total": 8388608, - "linear_nnz": 1695744, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 793600, - "linear_attention_total": 4194304, - "linear_dense_nnz": 532480, - "linear_dense_total": 8388608, - "linear_nnz": 1326080, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 726016, - "linear_attention_total": 4194304, - "linear_dense_nnz": 456704, - "linear_dense_total": 8388608, - "linear_nnz": 1182720, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 656384, - "linear_attention_total": 4194304, - "linear_dense_nnz": 440320, - "linear_dense_total": 8388608, - "linear_nnz": 1096704, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 281600, - "linear_attention_total": 4194304, - "linear_dense_nnz": 362496, - "linear_dense_total": 8388608, - "linear_nnz": 644096, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 238592, - "linear_attention_total": 4194304, - "linear_dense_nnz": 450560, - "linear_dense_total": 8388608, - "linear_nnz": 689152, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 137216, - "linear_attention_total": 4194304, - "linear_dense_nnz": 184320, - "linear_dense_total": 8388608, - "linear_nnz": 321536, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 175104, - "linear_attention_total": 4194304, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 287744, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 54272, - "linear_attention_total": 4194304, - "linear_dense_nnz": 114688, - "linear_dense_total": 8388608, - "linear_nnz": 168960, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 24576, - "linear_attention_total": 4194304, - "linear_dense_nnz": 184320, - "linear_dense_total": 8388608, - "linear_nnz": 208896, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 320512, - "linear_attention_total": 4194304, - "linear_dense_nnz": 548864, - "linear_dense_total": 8388608, - "linear_nnz": 869376, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 332800, - "linear_attention_total": 4194304, - "linear_dense_nnz": 614400, - "linear_dense_total": 8388608, - "linear_nnz": 947200, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 147456, - "linear_attention_total": 4194304, - "linear_dense_nnz": 839680, - "linear_dense_total": 8388608, - "linear_nnz": 987136, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 166912, - "linear_attention_total": 4194304, - "linear_dense_nnz": 858112, - "linear_dense_total": 8388608, - "linear_nnz": 1025024, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 376832, - "linear_attention_total": 4194304, - "linear_dense_nnz": 636928, - "linear_dense_total": 8388608, - "linear_nnz": 1013760, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 145408, - "linear_attention_total": 4194304, - "linear_dense_nnz": 847872, - "linear_dense_total": 8388608, - "linear_nnz": 993280, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 466944, - "linear_attention_total": 4194304, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 1368064, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 26903552, - "linear_sparsity": 91.09124077690971, - "linear_total": 301989888, - "nnz": 58856371, - "total": 334094338, - "total_sparsity": 82.38330785480117 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 10000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test_large/squad_test_large_regu_40_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test_large/squad_test_large_regu_40_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 8, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test_large/squad_test_large_regu_40_d0.25", - "save_steps": 10000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - }, - "/data_2to/devel_data/nn_pruning/output/squad_test_large/large_regu_40_d0.25/checkpoint-221320": { - "config": { - "_name_or_path": "bert-large-uncased", - "architectures": [ - "BertForQuestionAnswering" - ], - "attention_probs_dropout_prob": 0.1, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 1024, - "initializer_range": 0.02, - "intermediate_size": 4096, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 16, - "num_hidden_layers": 24, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "type_vocab_size": 2, - "vocab_size": 30522 - }, - "eval_metrics": { - "exact_match": 80.4635761589404, - "f1": 87.71992570037945 - }, - "sparse_args": { - "ampere_pruning_method": "disabled", - "attention_block_cols": 32, - "attention_block_rows": 32, - "attention_lambda": 1.0, - "attention_output_with_dense": 0, - "attention_pruning_method": "sigmoied_threshold", - "bias_mask": true, - "dense_block_cols": 1, - "dense_block_rows": 1, - "dense_lambda": 0.25, - "dense_pruning_method": "sigmoied_threshold:1d_alt", - "distil_alpha_ce": 0.1, - "distil_alpha_teacher": 0.9, - "distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", - "distil_temperature": 2.0, - "final_ampere_temperature": 20.0, - "final_finetune": false, - "final_threshold": 0.1, - "final_warmup": 10, - "initial_ampere_temperature": 0.0, - "initial_threshold": 0, - "initial_warmup": 1, - "mask_init": "constant", - "mask_scale": 0.0, - "mask_scores_learning_rate": 0.01, - "regularization": "l1", - "regularization_final_lambda": 40 - }, - "speed": { - "cuda_eval_elapsed_time": 29.83577773284912, - "eval_elapsed_time": 37.33651804598048 - }, - "speedup": 1.293560816511874, - "stats": { - "layers": { - "0": { - "linear_attention_nnz": 365568, - "linear_attention_total": 4194304, - "linear_dense_nnz": 253952, - "linear_dense_total": 8388608, - "linear_nnz": 619520, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "1": { - "linear_attention_nnz": 137216, - "linear_attention_total": 4194304, - "linear_dense_nnz": 432128, - "linear_dense_total": 8388608, - "linear_nnz": 569344, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "10": { - "linear_attention_nnz": 826368, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1210368, - "linear_dense_total": 8388608, - "linear_nnz": 2036736, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "11": { - "linear_attention_nnz": 764928, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1277952, - "linear_dense_total": 8388608, - "linear_nnz": 2042880, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "12": { - "linear_attention_nnz": 737280, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1400832, - "linear_dense_total": 8388608, - "linear_nnz": 2138112, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "13": { - "linear_attention_nnz": 1224704, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1464320, - "linear_dense_total": 8388608, - "linear_nnz": 2689024, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "14": { - "linear_attention_nnz": 869376, - "linear_attention_total": 4194304, - "linear_dense_nnz": 1122304, - "linear_dense_total": 8388608, - "linear_nnz": 1991680, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "15": { - "linear_attention_nnz": 924672, - "linear_attention_total": 4194304, - "linear_dense_nnz": 778240, - "linear_dense_total": 8388608, - "linear_nnz": 1702912, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "16": { - "linear_attention_nnz": 782336, - "linear_attention_total": 4194304, - "linear_dense_nnz": 532480, - "linear_dense_total": 8388608, - "linear_nnz": 1314816, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "17": { - "linear_attention_nnz": 720896, - "linear_attention_total": 4194304, - "linear_dense_nnz": 456704, - "linear_dense_total": 8388608, - "linear_nnz": 1177600, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "18": { - "linear_attention_nnz": 655360, - "linear_attention_total": 4194304, - "linear_dense_nnz": 440320, - "linear_dense_total": 8388608, - "linear_nnz": 1095680, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "19": { - "linear_attention_nnz": 288768, - "linear_attention_total": 4194304, - "linear_dense_nnz": 362496, - "linear_dense_total": 8388608, - "linear_nnz": 651264, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "2": { - "linear_attention_nnz": 242688, - "linear_attention_total": 4194304, - "linear_dense_nnz": 450560, - "linear_dense_total": 8388608, - "linear_nnz": 693248, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "20": { - "linear_attention_nnz": 141312, - "linear_attention_total": 4194304, - "linear_dense_nnz": 184320, - "linear_dense_total": 8388608, - "linear_nnz": 325632, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "21": { - "linear_attention_nnz": 173056, - "linear_attention_total": 4194304, - "linear_dense_nnz": 112640, - "linear_dense_total": 8388608, - "linear_nnz": 285696, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "22": { - "linear_attention_nnz": 55296, - "linear_attention_total": 4194304, - "linear_dense_nnz": 114688, - "linear_dense_total": 8388608, - "linear_nnz": 169984, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "23": { - "linear_attention_nnz": 24576, - "linear_attention_total": 4194304, - "linear_dense_nnz": 184320, - "linear_dense_total": 8388608, - "linear_nnz": 208896, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "3": { - "linear_attention_nnz": 327680, - "linear_attention_total": 4194304, - "linear_dense_nnz": 548864, - "linear_dense_total": 8388608, - "linear_nnz": 876544, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "4": { - "linear_attention_nnz": 332800, - "linear_attention_total": 4194304, - "linear_dense_nnz": 614400, - "linear_dense_total": 8388608, - "linear_nnz": 947200, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "5": { - "linear_attention_nnz": 139264, - "linear_attention_total": 4194304, - "linear_dense_nnz": 839680, - "linear_dense_total": 8388608, - "linear_nnz": 978944, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "6": { - "linear_attention_nnz": 165888, - "linear_attention_total": 4194304, - "linear_dense_nnz": 858112, - "linear_dense_total": 8388608, - "linear_nnz": 1024000, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "7": { - "linear_attention_nnz": 381952, - "linear_attention_total": 4194304, - "linear_dense_nnz": 636928, - "linear_dense_total": 8388608, - "linear_nnz": 1018880, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "8": { - "linear_attention_nnz": 134144, - "linear_attention_total": 4194304, - "linear_dense_nnz": 847872, - "linear_dense_total": 8388608, - "linear_nnz": 982016, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "9": { - "linear_attention_nnz": 489472, - "linear_attention_total": 4194304, - "linear_dense_nnz": 901120, - "linear_dense_total": 8388608, - "linear_nnz": 1390592, - "linear_total": 12582912, - "nnz": 1024, - "total": 1024 - }, - "null": { - "nnz": 2, - "total": 2 - } - }, - "linear_nnz": 26931200, - "linear_sparsity": 91.08208550347221, - "linear_total": 301989888, - "nnz": 58884211, - "total": 334094338, - "total_sparsity": 82.37497487910136 - }, - "training_args": { - "adam_beta1": 0.9, - "adam_beta2": 0.999, - "adam_epsilon": 1e-08, - "dataloader_drop_last": false, - "dataloader_num_workers": 0, - "debug": false, - "disable_tqdm": false, - "do_eval": 1, - "do_predict": false, - "do_train": 1, - "eval_accumulation_steps": null, - "eval_steps": 10000, - "evaluation_strategy": "steps", - "fp16": false, - "fp16_opt_level": "O1", - "gradient_accumulation_steps": 1, - "greater_is_better": null, - "ignore_data_skip": false, - "label_names": null, - "learning_rate": 3e-05, - "load_best_model_at_end": false, - "local_rank": -1, - "logging_dir": "output/squad_test_large/squad_test_large_regu_40_d0.25", - "logging_first_step": false, - "logging_steps": 250, - "max_grad_norm": 1.0, - "max_steps": -1, - "metric_for_best_model": null, - "model_parallel": false, - "no_cuda": false, - "num_train_epochs": 20, - "optimize_model_before_eval": "disabled", - "output_dir": "output/squad_test_large/squad_test_large_regu_40_d0.25", - "overwrite_output_dir": 1, - "past_index": -1, - "per_device_eval_batch_size": 8, - "per_device_train_batch_size": 8, - "per_gpu_eval_batch_size": null, - "per_gpu_train_batch_size": null, - "prediction_loss_only": false, - "remove_unused_columns": true, - "run_name": "output/squad_test_large/squad_test_large_regu_40_d0.25", - "save_steps": 10000, - "save_total_limit": 50, - "seed": 17, - "tpu_metrics_debug": false, - "tpu_num_cores": null, - "warmup_steps": 5400, - "weight_decay": 0.0 - } - } - } -} \ No newline at end of file